shithub: trueawk

Download patch

ref: c879fbf013b5314c27fa236c987ea56a521420e6
parent: 0e1bebcc09e647ab3f7f6bd8fba34ee4a64efc48
author: Arnold D. Robbins <arnold@skeeve.com>
date: Fri Nov 8 09:40:18 EST 2019

From Ori Bernstein, ori@eigenstate.org, for FS="" in multibyte locale.

--- a/FIXES
+++ b/FIXES
@@ -25,6 +25,12 @@
 This file lists all bug fixes, changes, etc., made since the AWK book
 was sent to the printers in August, 1987.
 
+November 8, 2019:
+	Fix from Ori Bernstein to get UTF-8 characters instead of
+	bytes when FS = "".  This is currently the only bit of
+	the One True Awk that understands multibyte characters.
+	From Arnold Robbins, apply some cleanups in the test suite.
+
 October 25, 2019:
 	More fixes and cleanups from NetBSD, courtesy of Christos
 	Zoulas. Merges PRs 54 and 55.
--- a/lib.c
+++ b/lib.c
@@ -332,15 +332,19 @@
 		}
 		*fr = 0;
 	} else if ((sep = *inputFS) == 0) {		/* new: FS="" => 1 char/field */
-		for (i = 0; *r != 0; r++) {
-			char buf[2];
+		for (i = 0; *r != '\0'; r += n) {
+			char buf[MB_CUR_MAX + 1];
+
 			i++;
 			if (i > nfields)
 				growfldtab(i);
 			if (freeable(fldtab[i]))
 				xfree(fldtab[i]->sval);
-			buf[0] = *r;
-			buf[1] = 0;
+			n = mblen(r, MB_CUR_MAX);
+			if (n < 0)
+				n = 1;
+			memcpy(buf, r, n);
+			buf[n] = '\0';
 			fldtab[i]->sval = tostring(buf);
 			fldtab[i]->tval = FLD | STR;
 		}
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
 THIS SOFTWARE.
 ****************************************************************/
 
-const char	*version = "version 20191025";
+const char	*version = "version 20191108";
 
 #define DEBUG
 #include <stdio.h>
--