ref: c879fbf013b5314c27fa236c987ea56a521420e6
parent: 0e1bebcc09e647ab3f7f6bd8fba34ee4a64efc48
author: Arnold D. Robbins <arnold@skeeve.com>
date: Fri Nov 8 09:40:18 EST 2019
From Ori Bernstein, ori@eigenstate.org, for FS="" in multibyte locale.
--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,12 @@
This file lists all bug fixes, changes, etc., made since the AWK book
was sent to the printers in August, 1987.
+November 8, 2019:
+ Fix from Ori Bernstein to get UTF-8 characters instead of
+ bytes when FS = "". This is currently the only bit of
+ the One True Awk that understands multibyte characters.
+ From Arnold Robbins, apply some cleanups in the test suite.
+
October 25, 2019:
More fixes and cleanups from NetBSD, courtesy of Christos
Zoulas. Merges PRs 54 and 55.
--- a/lib.c
+++ b/lib.c
@@ -332,15 +332,19 @@
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */- for (i = 0; *r != 0; r++) {- char buf[2];
+ for (i = 0; *r != '\0'; r += n) {+ char buf[MB_CUR_MAX + 1];
+
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
- buf[0] = *r;
- buf[1] = 0;
+ n = mblen(r, MB_CUR_MAX);
+ if (n < 0)
+ n = 1;
+ memcpy(buf, r, n);
+ buf[n] = '\0';
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
THIS SOFTWARE.
****************************************************************/
-const char *version = "version 20191025";
+const char *version = "version 20191108";
#define DEBUG
#include <stdio.h>
--
⑨