summaryrefslogtreecommitdiffstats
path: root/contrib/nvi/vi/v_word.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/nvi/vi/v_word.c')
-rw-r--r--contrib/nvi/vi/v_word.c547
1 files changed, 547 insertions, 0 deletions
diff --git a/contrib/nvi/vi/v_word.c b/contrib/nvi/vi/v_word.c
new file mode 100644
index 0000000..e6e4a63
--- /dev/null
+++ b/contrib/nvi/vi/v_word.c
@@ -0,0 +1,547 @@
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1992, 1993, 1994, 1995, 1996
+ * Keith Bostic. All rights reserved.
+ *
+ * See the LICENSE file for redistribution information.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+
+#include <bitstring.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "../common/common.h"
+#include "vi.h"
+
+/*
+ * There are two types of "words". Bigwords are easy -- groups of anything
+ * delimited by whitespace. Normal words are trickier. They are either a
+ * group of characters, numbers and underscores, or a group of anything but,
+ * delimited by whitespace. When for a word, if you're in whitespace, it's
+ * easy, just remove the whitespace and go to the beginning or end of the
+ * word. Otherwise, figure out if the next character is in a different group.
+ * If it is, go to the beginning or end of that group, otherwise, go to the
+ * beginning or end of the current group. The historic version of vi didn't
+ * get this right, so, for example, there were cases where "4e" was not the
+ * same as "eeee" -- in particular, single character words, and commands that
+ * began in whitespace were almost always handled incorrectly. To get it right
+ * you have to resolve the cursor after each search so that the look-ahead to
+ * figure out what type of "word" the cursor is in will be correct.
+ *
+ * Empty lines, and lines that consist of only white-space characters count
+ * as a single word, and the beginning and end of the file counts as an
+ * infinite number of words.
+ *
+ * Movements associated with commands are different than movement commands.
+ * For example, in "abc def", with the cursor on the 'a', "cw" is from
+ * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
+ * space is discarded from the change movement. Another example is that,
+ * in the same string, a "cw" on any white space character replaces that
+ * single character, and nothing else. Ain't nothin' in here that's easy.
+ *
+ * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
+ * would treat groups of empty lines as individual words, i.e. the command
+ * would move the cursor to each new empty line. The 'e' and 'E' commands
+ * would treat groups of empty lines as a single word, i.e. the first use
+ * would move past the group of lines. The 'b' command would just beep at
+ * you, or, if you did it from the start of the line as part of a motion
+ * command, go absolutely nuts. If the lines contained only white-space
+ * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
+ * 'b', 'E' and 'e' commands would treat the group as a single word, and
+ * the 'B' and 'b' commands will treat the lines as individual words. This
+ * implementation treats all of these cases as a single white-space word.
+ */
+
+enum which {BIGWORD, LITTLEWORD};
+
+static int bword __P((SCR *, VICMD *, enum which));
+static int eword __P((SCR *, VICMD *, enum which));
+static int fword __P((SCR *, VICMD *, enum which));
+
+/*
+ * v_wordW -- [count]W
+ * Move forward a bigword at a time.
+ *
+ * PUBLIC: int v_wordW __P((SCR *, VICMD *));
+ */
+int
+v_wordW(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (fword(sp, vp, BIGWORD));
+}
+
+/*
+ * v_wordw -- [count]w
+ * Move forward a word at a time.
+ *
+ * PUBLIC: int v_wordw __P((SCR *, VICMD *));
+ */
+int
+v_wordw(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (fword(sp, vp, LITTLEWORD));
+}
+
+/*
+ * fword --
+ * Move forward by words.
+ */
+static int
+fword(sp, vp, type)
+ SCR *sp;
+ VICMD *vp;
+ enum which type;
+{
+ enum { INWORD, NOTWORD } state;
+ VCS cs;
+ u_long cnt;
+
+ cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
+ cs.cs_lno = vp->m_start.lno;
+ cs.cs_cno = vp->m_start.cno;
+ if (cs_init(sp, &cs))
+ return (1);
+
+ /*
+ * If in white-space:
+ * If the count is 1, and it's a change command, we're done.
+ * Else, move to the first non-white-space character, which
+ * counts as a single word move. If it's a motion command,
+ * don't move off the end of the line.
+ */
+ if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
+ if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
+ if (ISCMD(vp->rkp, 'c'))
+ return (0);
+ if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
+ if (cs_fspace(sp, &cs))
+ return (1);
+ goto ret;
+ }
+ }
+ if (cs_fblank(sp, &cs))
+ return (1);
+ --cnt;
+ }
+
+ /*
+ * Cyclically move to the next word -- this involves skipping
+ * over word characters and then any trailing non-word characters.
+ * Note, for the 'w' command, the definition of a word keeps
+ * switching.
+ */
+ if (type == BIGWORD)
+ while (cnt--) {
+ for (;;) {
+ if (cs_next(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ }
+ /*
+ * If a motion command and we're at the end of the
+ * last word, we're done. Delete and yank eat any
+ * trailing blanks, but we don't move off the end
+ * of the line regardless.
+ */
+ if (cnt == 0 && ISMOTION(vp)) {
+ if ((ISCMD(vp->rkp, 'd') ||
+ ISCMD(vp->rkp, 'y')) &&
+ cs_fspace(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs_fblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ }
+ else
+ while (cnt--) {
+ state = cs.cs_flags == 0 &&
+ inword(cs.cs_ch) ? INWORD : NOTWORD;
+ for (;;) {
+ if (cs_next(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ if (state == INWORD) {
+ if (!inword(cs.cs_ch))
+ break;
+ } else
+ if (inword(cs.cs_ch))
+ break;
+ }
+ /* See comment above. */
+ if (cnt == 0 && ISMOTION(vp)) {
+ if ((ISCMD(vp->rkp, 'd') ||
+ ISCMD(vp->rkp, 'y')) &&
+ cs_fspace(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ if (cs_fblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ }
+
+ /*
+ * If we didn't move, we must be at EOF.
+ *
+ * !!!
+ * That's okay for motion commands, however.
+ */
+ret: if (!ISMOTION(vp) &&
+ cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
+ v_eof(sp, &vp->m_start);
+ return (1);
+ }
+
+ /* Adjust the end of the range for motion commands. */
+ vp->m_stop.lno = cs.cs_lno;
+ vp->m_stop.cno = cs.cs_cno;
+ if (ISMOTION(vp) && cs.cs_flags == 0)
+ --vp->m_stop.cno;
+
+ /*
+ * Non-motion commands move to the end of the range. Delete
+ * and yank stay at the start, ignore others.
+ */
+ vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
+ return (0);
+}
+
+/*
+ * v_wordE -- [count]E
+ * Move forward to the end of the bigword.
+ *
+ * PUBLIC: int v_wordE __P((SCR *, VICMD *));
+ */
+int
+v_wordE(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (eword(sp, vp, BIGWORD));
+}
+
+/*
+ * v_worde -- [count]e
+ * Move forward to the end of the word.
+ *
+ * PUBLIC: int v_worde __P((SCR *, VICMD *));
+ */
+int
+v_worde(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (eword(sp, vp, LITTLEWORD));
+}
+
+/*
+ * eword --
+ * Move forward to the end of the word.
+ */
+static int
+eword(sp, vp, type)
+ SCR *sp;
+ VICMD *vp;
+ enum which type;
+{
+ enum { INWORD, NOTWORD } state;
+ VCS cs;
+ u_long cnt;
+
+ cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
+ cs.cs_lno = vp->m_start.lno;
+ cs.cs_cno = vp->m_start.cno;
+ if (cs_init(sp, &cs))
+ return (1);
+
+ /*
+ * !!!
+ * If in whitespace, or the next character is whitespace, move past
+ * it. (This doesn't count as a word move.) Stay at the character
+ * past the current one, it sets word "state" for the 'e' command.
+ */
+ if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
+ if (cs_next(sp, &cs))
+ return (1);
+ if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
+ goto start;
+ }
+ if (cs_fblank(sp, &cs))
+ return (1);
+
+ /*
+ * Cyclically move to the next word -- this involves skipping
+ * over word characters and then any trailing non-word characters.
+ * Note, for the 'e' command, the definition of a word keeps
+ * switching.
+ */
+start: if (type == BIGWORD)
+ while (cnt--) {
+ for (;;) {
+ if (cs_next(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ }
+ /*
+ * When we reach the start of the word after the last
+ * word, we're done. If we changed state, back up one
+ * to the end of the previous word.
+ */
+ if (cnt == 0) {
+ if (cs.cs_flags == 0 && cs_prev(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs_fblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ }
+ else
+ while (cnt--) {
+ state = cs.cs_flags == 0 &&
+ inword(cs.cs_ch) ? INWORD : NOTWORD;
+ for (;;) {
+ if (cs_next(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ if (state == INWORD) {
+ if (!inword(cs.cs_ch))
+ break;
+ } else
+ if (inword(cs.cs_ch))
+ break;
+ }
+ /* See comment above. */
+ if (cnt == 0) {
+ if (cs.cs_flags == 0 && cs_prev(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ if (cs_fblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_EOF)
+ goto ret;
+ }
+
+ /*
+ * If we didn't move, we must be at EOF.
+ *
+ * !!!
+ * That's okay for motion commands, however.
+ */
+ret: if (!ISMOTION(vp) &&
+ cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
+ v_eof(sp, &vp->m_start);
+ return (1);
+ }
+
+ /* Set the end of the range for motion commands. */
+ vp->m_stop.lno = cs.cs_lno;
+ vp->m_stop.cno = cs.cs_cno;
+
+ /*
+ * Non-motion commands move to the end of the range.
+ * Delete and yank stay at the start, ignore others.
+ */
+ vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
+ return (0);
+}
+
+/*
+ * v_WordB -- [count]B
+ * Move backward a bigword at a time.
+ *
+ * PUBLIC: int v_wordB __P((SCR *, VICMD *));
+ */
+int
+v_wordB(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (bword(sp, vp, BIGWORD));
+}
+
+/*
+ * v_wordb -- [count]b
+ * Move backward a word at a time.
+ *
+ * PUBLIC: int v_wordb __P((SCR *, VICMD *));
+ */
+int
+v_wordb(sp, vp)
+ SCR *sp;
+ VICMD *vp;
+{
+ return (bword(sp, vp, LITTLEWORD));
+}
+
+/*
+ * bword --
+ * Move backward by words.
+ */
+static int
+bword(sp, vp, type)
+ SCR *sp;
+ VICMD *vp;
+ enum which type;
+{
+ enum { INWORD, NOTWORD } state;
+ VCS cs;
+ u_long cnt;
+
+ cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
+ cs.cs_lno = vp->m_start.lno;
+ cs.cs_cno = vp->m_start.cno;
+ if (cs_init(sp, &cs))
+ return (1);
+
+ /*
+ * !!!
+ * If in whitespace, or the previous character is whitespace, move
+ * past it. (This doesn't count as a word move.) Stay at the
+ * character before the current one, it sets word "state" for the
+ * 'b' command.
+ */
+ if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
+ if (cs_prev(sp, &cs))
+ return (1);
+ if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
+ goto start;
+ }
+ if (cs_bblank(sp, &cs))
+ return (1);
+
+ /*
+ * Cyclically move to the beginning of the previous word -- this
+ * involves skipping over word characters and then any trailing
+ * non-word characters. Note, for the 'b' command, the definition
+ * of a word keeps switching.
+ */
+start: if (type == BIGWORD)
+ while (cnt--) {
+ for (;;) {
+ if (cs_prev(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_SOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ }
+ /*
+ * When we reach the end of the word before the last
+ * word, we're done. If we changed state, move forward
+ * one to the end of the next word.
+ */
+ if (cnt == 0) {
+ if (cs.cs_flags == 0 && cs_next(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs_bblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_SOF)
+ goto ret;
+ }
+ else
+ while (cnt--) {
+ state = cs.cs_flags == 0 &&
+ inword(cs.cs_ch) ? INWORD : NOTWORD;
+ for (;;) {
+ if (cs_prev(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_SOF)
+ goto ret;
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ break;
+ if (state == INWORD) {
+ if (!inword(cs.cs_ch))
+ break;
+ } else
+ if (inword(cs.cs_ch))
+ break;
+ }
+ /* See comment above. */
+ if (cnt == 0) {
+ if (cs.cs_flags == 0 && cs_next(sp, &cs))
+ return (1);
+ break;
+ }
+
+ /* Eat whitespace characters. */
+ if (cs.cs_flags != 0 || isblank(cs.cs_ch))
+ if (cs_bblank(sp, &cs))
+ return (1);
+ if (cs.cs_flags == CS_SOF)
+ goto ret;
+ }
+
+ /* If we didn't move, we must be at SOF. */
+ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
+ v_sof(sp, &vp->m_start);
+ return (1);
+ }
+
+ /* Set the end of the range for motion commands. */
+ vp->m_stop.lno = cs.cs_lno;
+ vp->m_stop.cno = cs.cs_cno;
+
+ /*
+ * All commands move to the end of the range. Motion commands
+ * adjust the starting point to the character before the current
+ * one.
+ *
+ * !!!
+ * The historic vi didn't get this right -- the `yb' command yanked
+ * the right stuff and even updated the cursor value, but the cursor
+ * was not actually updated on the screen.
+ */
+ vp->m_final = vp->m_stop;
+ if (ISMOTION(vp))
+ --vp->m_start.cno;
+ return (0);
+}
OpenPOWER on IntegriCloud