summaryrefslogtreecommitdiffstats
path: root/usr.bin/fmt
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2004-08-02 11:10:20 +0000
committertjr <tjr@FreeBSD.org>2004-08-02 11:10:20 +0000
commit9811ba2d677c22722fb78ac077b643a6290b6f96 (patch)
tree553a9fa70acc7061e7643551a123973d12840303 /usr.bin/fmt
parent43af5e5ee20c758bb8c64bcb0ea91c2c26ddab47 (diff)
downloadFreeBSD-src-9811ba2d677c22722fb78ac077b643a6290b6f96.zip
FreeBSD-src-9811ba2d677c22722fb78ac077b643a6290b6f96.tar.gz
Add support for multibyte characters.
Diffstat (limited to 'usr.bin/fmt')
-rw-r--r--usr.bin/fmt/fmt.113
-rw-r--r--usr.bin/fmt/fmt.c138
2 files changed, 94 insertions, 57 deletions
diff --git a/usr.bin/fmt/fmt.1 b/usr.bin/fmt/fmt.1
index 1dde958..617f495 100644
--- a/usr.bin/fmt/fmt.1
+++ b/usr.bin/fmt/fmt.1
@@ -34,7 +34,7 @@
.\"
.\" Modified by Gareth McCaughan to describe the new version of `fmt'
.\" rather than the old one.
-.Dd July 17, 2004
+.Dd August 2, 2004
.Dt FMT 1
.Os
.Sh NAME
@@ -154,6 +154,15 @@ the command
.Pp
will reformat a paragraph,
evening the lines.
+.Sh ENVIRONMENT
+The
+.Ev LANG , LC_ALL
+and
+.Ev LC_CTYPE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
.Sh SEE ALSO
.Xr mail 1 ,
.Xr nroff 1
@@ -184,5 +193,3 @@ The
.Nm
utility is not infallible in guessing what lines are mail headers and what
lines are not.
-.Pp
-Multibyte characters are not handled correctly.
diff --git a/usr.bin/fmt/fmt.c b/usr.bin/fmt/fmt.c
index 8a763b5..33f92b1 100644
--- a/usr.bin/fmt/fmt.c
+++ b/usr.bin/fmt/fmt.c
@@ -175,7 +175,6 @@ static const char copyright[] =
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <ctype.h>
#include <err.h>
#include <locale.h>
#include <stdio.h>
@@ -183,6 +182,8 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
/* Something that, we hope, will never be a genuine line length,
* indentation etc.
@@ -222,14 +223,15 @@ static int coalesce_spaces_P=0; /* Coalesce multiple whitespace -> ' ' ? */
static int allow_indented_paragraphs=0; /* Can first line have diff. ind.? */
static int tab_width=8; /* Number of spaces per tab stop */
static size_t output_tab_width=8; /* Ditto, when squashing leading spaces */
-static const char *sentence_enders=".?!"; /* Double-space after these */
+static const wchar_t *sentence_enders=L".?!"; /* Double-space after these */
static int grok_mail_headers=0; /* treat embedded mail headers magically? */
static int format_troff=0; /* Format troff? */
static int n_errors=0; /* Number of failed files. Return on exit. */
-static char *output_buffer=0; /* Output line will be built here */
+static wchar_t *output_buffer=0; /* Output line will be built here */
static size_t x; /* Horizontal position in output line */
static size_t x0; /* Ditto, ignoring leading whitespace */
+static size_t output_buffer_length = 0;
static size_t pending_spaces; /* Spaces to add before next word */
static int output_in_paragraph=0; /* Any of current para written out yet? */
@@ -237,13 +239,14 @@ static int output_in_paragraph=0; /* Any of current para written out yet? */
static void process_named_file (const char *);
static void process_stream (FILE *, const char *);
-static size_t indent_length (const char *, size_t);
-static int might_be_header (const unsigned char *);
+static size_t indent_length (const wchar_t *, size_t);
+static int might_be_header (const wchar_t *);
static void new_paragraph (size_t, size_t);
-static void output_word (size_t, size_t, const char *, size_t, size_t);
+static void output_word (size_t, size_t, const wchar_t *, size_t,
+ size_t);
static void output_indent (size_t);
static void center_stream (FILE *, const char *);
-static char * get_line (FILE *, size_t *);
+static wchar_t * get_line (FILE *, size_t *);
static void * xrealloc (void *, size_t);
#define XMALLOC(x) xrealloc(0,x)
@@ -254,7 +257,9 @@ static void * xrealloc (void *, size_t);
int
main(int argc, char *argv[]) {
int ch; /* used for |getopt| processing */
-
+ wchar_t *tmp;
+ size_t len;
+ const char *src;
(void) setlocale(LC_CTYPE, "");
@@ -267,7 +272,13 @@ main(int argc, char *argv[]) {
format_troff = 1;
continue;
case 'd':
- sentence_enders = optarg;
+ src = optarg;
+ len = mbsrtowcs(NULL, &src, 0, NULL);
+ if (len == (size_t)-1)
+ err(EX_USAGE, "bad sentence-ending character set");
+ tmp = XMALLOC((len + 1) * sizeof(wchar_t));
+ mbsrtowcs(tmp, &src, len + 1, NULL);
+ sentence_enders = tmp;
continue;
case 'l':
output_tab_width
@@ -340,7 +351,8 @@ main(int argc, char *argv[]) {
}
if (goal_length==0) goal_length = 65;
if (max_length==0) max_length = goal_length+10;
- output_buffer = XMALLOC(max_length+1); /* really needn't be longer */
+ /* really needn't be longer */
+ output_buffer = XMALLOC((max_length+1) * sizeof(wchar_t));
/* 2. Process files. */
@@ -365,6 +377,7 @@ process_named_file(const char *name) {
if (!f) { warn("%s", name); ++n_errors; }
else {
process_stream(f, name);
+ if (ferror(f)) { warn("%s", name); ++n_errors; }
fclose(f);
}
}
@@ -388,7 +401,7 @@ process_stream(FILE *stream, const char *name) {
size_t first_indent=SILLY; /* indentation of line 0 of paragraph */
HdrType prev_header_type=hdr_ParagraphStart;
/* ^-- header_type of previous line; -1 at para start */
- char *line;
+ wchar_t *line;
size_t length;
if (centerP) { center_stream(stream, name); return; }
@@ -424,9 +437,9 @@ process_stream(FILE *stream, const char *name) {
if (header_type==hdr_Header) last_indent=2; /* for cont. lines */
if (length==0 || (line[0]=='.' && !format_troff)) {
if (length==0)
- putchar('\n');
+ putwchar('\n');
else
- printf("%.*s\n", (int)length, line);
+ wprintf(L"%.*ls\n", (int)length, line);
prev_header_type=hdr_ParagraphStart;
continue;
}
@@ -465,7 +478,7 @@ process_stream(FILE *stream, const char *name) {
/* How long is the indent on this line?
*/
static size_t
-indent_length(const char *line, size_t length) {
+indent_length(const wchar_t *line, size_t length) {
size_t n=0;
while (n<length && *line++ == ' ') ++n;
return n;
@@ -478,22 +491,21 @@ indent_length(const char *line, size_t length) {
* conservative to avoid mangling ordinary civilised text.
*/
static int
-might_be_header(const unsigned char *line) {
- if (!isupper(*line++)) return 0;
- while (*line && (isalnum(*line) || *line=='-')) ++line;
- return (*line==':' && isspace(line[1]));
+might_be_header(const wchar_t *line) {
+ if (!iswupper(*line++)) return 0;
+ while (*line && (iswalnum(*line) || *line=='-')) ++line;
+ return (*line==':' && iswspace(line[1]));
}
/* Begin a new paragraph with an indent of |indent| spaces.
*/
static void
new_paragraph(size_t old_indent, size_t indent) {
- if (x0) {
+ if (output_buffer_length) {
if (old_indent>0) output_indent(old_indent);
- fwrite(output_buffer, 1, x0, stdout);
- putchar('\n');
+ wprintf(L"%.*ls\n", (int)output_buffer_length, output_buffer);
}
- x=indent; x0=0; pending_spaces=0;
+ x=indent; x0=0; output_buffer_length=0; pending_spaces=0;
output_in_paragraph = 0;
}
@@ -503,11 +515,11 @@ static void
output_indent(size_t n_spaces) {
if (output_tab_width) {
while (n_spaces >= output_tab_width) {
- putchar('\t');
+ putwchar('\t');
n_spaces -= output_tab_width;
}
}
- while (n_spaces-- > 0) putchar(' ');
+ while (n_spaces-- > 0) putwchar(' ');
}
/* Output a single word, or add it to the buffer.
@@ -515,9 +527,17 @@ output_indent(size_t n_spaces) {
* lines of a paragraph. They'll often be the same, of course.
*/
static void
-output_word(size_t indent0, size_t indent1, const char *word, size_t length, size_t spaces) {
- size_t new_x = x+pending_spaces+length;
+output_word(size_t indent0, size_t indent1, const wchar_t *word, size_t length, size_t spaces) {
+ size_t new_x;
size_t indent = output_in_paragraph ? indent1 : indent0;
+ size_t width;
+ const wchar_t *p;
+ int cwidth;
+
+ for (p = word, width = 0; p < &word[length]; p++)
+ width += (cwidth = wcwidth(*p)) > 0 ? cwidth : 1;
+
+ new_x = x + pending_spaces + width;
/* If either |spaces==0| (at end of line) or |coalesce_spaces_P|
* (squashing internal whitespace), then add just one space;
@@ -525,16 +545,17 @@ output_word(size_t indent0, size_t indent1, const char *word, size_t length, siz
* actually add two spaces.
*/
if (coalesce_spaces_P || spaces==0)
- spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1;
+ spaces = wcschr(sentence_enders, word[length-1]) ? 2 : 1;
if (new_x<=goal_length) {
/* After adding the word we still aren't at the goal length,
* so clearly we add it to the buffer rather than outputing it.
*/
- memset(output_buffer+x0, ' ', pending_spaces);
+ wmemset(output_buffer+output_buffer_length, L' ', pending_spaces);
x0 += pending_spaces; x += pending_spaces;
- memcpy(output_buffer+x0, word, length);
- x0 += length; x += length;
+ output_buffer_length += pending_spaces;
+ wmemcpy(output_buffer+output_buffer_length, word, length);
+ x0 += width; x += width; output_buffer_length += length;
pending_spaces = spaces;
}
else {
@@ -545,28 +566,30 @@ output_word(size_t indent0, size_t indent1, const char *word, size_t length, siz
* In case (3) we put a newline in between.
*/
if (indent>0) output_indent(indent);
- fwrite(output_buffer, 1, x0, stdout);
+ wprintf(L"%.*ls", (int)output_buffer_length, output_buffer);
if (x0==0 || (new_x <= max_length && new_x-goal_length <= goal_length-x)) {
- printf("%*s", (int)pending_spaces, "");
+ wprintf(L"%*ls", (int)pending_spaces, L"");
goto write_out_word;
}
else {
/* If the word takes us over the limit on its own, just
* spit it out and don't bother buffering it.
*/
- if (indent+length > max_length) {
- putchar('\n');
+ if (indent+width > max_length) {
+ putwchar('\n');
if (indent>0) output_indent(indent);
write_out_word:
- fwrite(word, 1, length, stdout);
+ wprintf(L"%.*ls", (int)length, word);
x0 = 0; x = indent1; pending_spaces = 0;
+ output_buffer_length = 0;
}
else {
- memcpy(output_buffer, word, length);
- x0 = length; x = length+indent1; pending_spaces = spaces;
+ wmemcpy(output_buffer, word, length);
+ x0 = width; x = width+indent1; pending_spaces = spaces;
+ output_buffer_length = length;
}
}
- putchar('\n');
+ putwchar('\n');
output_in_paragraph = 1;
}
}
@@ -576,15 +599,19 @@ write_out_word:
*/
static void
center_stream(FILE *stream, const char *name) {
- char *line;
+ wchar_t *line, *p;
size_t length;
+ size_t width;
+ int cwidth;
while ((line=get_line(stream, &length)) != 0) {
size_t l=length;
- while (l>0 && isspace((unsigned char)*line)) { ++line; --l; }
+ while (l>0 && iswspace(*line)) { ++line; --l; }
length=l;
- while (l<goal_length) { putchar(' '); l+=2; }
- fwrite(line, 1, length, stdout);
- putchar('\n');
+ for (p = line, width = 0; p < &line[length]; p++)
+ width += (cwidth = wcwidth(*p)) > 0 ? cwidth : 1;
+ l = width;
+ while (l<goal_length) { putwchar(' '); l+=2; }
+ wprintf(L"%.*ls\n", (int)length, line);
}
if (ferror(stream)) { warn("%s", name); ++n_errors; }
}
@@ -600,32 +627,35 @@ center_stream(FILE *stream, const char *name) {
* Don't confuse |spaces_pending| here with the global
* |pending_spaces|.
*/
-static char *
+static wchar_t *
get_line(FILE *stream, size_t *lengthp) {
- static char *buf=NULL;
+ static wchar_t *buf=NULL;
static size_t length=0;
size_t len=0;
- int ch;
+ wint_t ch;
size_t spaces_pending=0;
int troff=0;
+ size_t col=0;
+ int cwidth;
- if (buf==NULL) { length=100; buf=XMALLOC(length); }
- while ((ch=getc(stream)) != '\n' && ch != EOF) {
+ if (buf==NULL) { length=100; buf=XMALLOC(length * sizeof(wchar_t)); }
+ while ((ch=getwc(stream)) != '\n' && ch != WEOF) {
if (len+spaces_pending==0 && ch=='.' && !format_troff) troff=1;
if (ch==' ') ++spaces_pending;
- else if (troff || isprint(ch)) {
+ else if (troff || iswprint(ch)) {
while (len+spaces_pending >= length) {
- length*=2; buf=xrealloc(buf, length);
+ length*=2; buf=xrealloc(buf, length * sizeof(wchar_t));
}
- while (spaces_pending > 0) { --spaces_pending; buf[len++]=' '; }
+ while (spaces_pending > 0) { --spaces_pending; buf[len++]=' '; col++; }
buf[len++] = ch;
+ col += (cwidth = wcwidth(ch)) > 0 ? cwidth : 1;
}
else if (ch=='\t')
- spaces_pending += tab_width - (len+spaces_pending)%tab_width;
- else if (ch=='\b') { if (len) --len; }
+ spaces_pending += tab_width - (col+spaces_pending)%tab_width;
+ else if (ch=='\b') { if (len) --len; if (col) --col; }
}
*lengthp=len;
- return (len>0 || ch!=EOF) ? buf : 0;
+ return (len>0 || ch!=WEOF) ? buf : 0;
}
/* (Re)allocate some memory, exiting with an error if we can't.
OpenPOWER on IntegriCloud