shithub: trueawk

Download patch

ref: 44c8f22312adc76b9ae02d636c967baaa7e7661a
parent: c1126ba66854b4b7a47deda563faafac60704d08
author: Brian Kernighan <fakeuser@fake.com>
date: Fri Oct 14 11:17:22 EDT 2022

fixes for printf from arnold (for which many thanks)

--- a/b.c
+++ b/b.c
@@ -400,12 +400,11 @@
 {
 	int i, c, c2;
 	int n;
-	const uschar *op, *p = (const uschar *) argp;
+	const uschar *p = (const uschar *) argp;
 	int *bp, *retp;
 	static int *buf = NULL;
 	static int bufsz = 100;
 
-	op = p;
 	if (buf == NULL && (buf = (int *) calloc(bufsz, sizeof(int))) == NULL)
 		FATAL("out of space for character class [%.10s...] 1", p);
 	bp = buf;
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
 THIS SOFTWARE.
 ****************************************************************/
 
-const char	*version = "One True Awk 20221002";
+const char	*version = "One True Awk 20221014";
 
 #define DEBUG
 #include <stdio.h>
--- a/run.c
+++ b/run.c
@@ -582,7 +582,7 @@
 
 /* ======== utf-8 code ========== */
 
-/* 
+/*
  * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
  * or utf-8.  u8_isutf tests whether a string starts with a valid
  * utf-8 sequence, and returns 0 if not (e.g., high bit set).
@@ -1145,13 +1145,19 @@
 
 			/* get here if string has utf-8 chars and fmt is not plain %s */
 			/* "%-w.ps", where -, w and .p are all optional */
+			/* '0' before the w is a flag character */
 			/* fmt points at % */
 			int ljust = 0, wid = 0, prec = n, pad = 0;
+			char padchar = ' ';
 			char *f = fmt+1;
 			if (f[0] == '-') {
 				ljust = 1;
 				f++;
 			}
+			if (f[0] == '0') {	/* '0' is a flag, pad with zeroes, even %s */
+				padchar = '0';
+				f++;
+			}
 			if (isdigit(f[0])) { /* there is a wid */
 				wid = strtol(f, &f, 10);
 			}
@@ -1171,12 +1177,12 @@
 				}
 				for (i = 0; i < pad; i++) {
 					//printf(" ");
-					*p++ = ' ';
+					*p++ = padchar;
 				}
 			} else { // print pad blanks, then prec chars from t
 				for (i = 0; i < pad; i++) {
 					//printf(" ");
-					*p++ = ' ';
+					*p++ = padchar;
 				}
 				n = u8_char2byte(t, prec);
 				for (k = 0; k < n; k++) {
@@ -1188,7 +1194,18 @@
 			break;
 		}
 
-		case 'c':
+               case 'c': {
+                       /*
+                        * FIXME: Once upon a time, if a numeric value was given,
+                        * awk just turned it into a character and printed it:
+                        *      BEGIN { printf("%c\n", 65) }
+                        * printed "A".
+                        *
+                        * But nowadays, what if the numeric value is > 256 and
+                        * represents a valid Unicode code point?!?
+                        *
+                        * We're punting on this for the moment...
+                        */
 			if (isnum(x)) {
 				if ((int)getfval(x)) {
 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
@@ -1204,11 +1221,48 @@
 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
 				break;
 			}
-			// utf8 character
-			for (int i = 0; i < n; i++)
-				*p++ = t[i];
+
+                       // utf8 character, almost same song and dance as for %s
+                       int ljust = 0, wid = 0, prec = n, pad = 0;
+                       char padchar = ' ';
+                       char *f = fmt+1;
+                       if (f[0] == '-') {
+                               ljust = 1;
+                               f++;
+                       }
+                       if (f[0] == '0') {      /* '0' is a flag, pad with zeroes, even %s */
+                               padchar = '0';
+                               f++;
+                       }
+                       if (isdigit(f[0])) { /* there is a wid */
+                               wid = strtol(f, &f, 10);
+                       }
+                       if (f[0] == '.') { /* there is a .prec */
+                               prec = strtol(++f, &f, 10);
+                       }
+                       if (prec > 1)           // %c --> only one character
+                               prec = 1;
+                       pad = wid>prec ? wid - prec : 0;  // has to be >= 0
+                       int i;
+
+                       if (ljust) { // print one char from t, then pad blanks
+                               for (int i = 0; i < n; i++)
+                                       *p++ = t[i];
+                               for (i = 0; i < pad; i++) {
+                                       //printf(" ");
+                                       *p++ = padchar;
+                               }
+                       } else { // print pad blanks, then prec chars from t
+                               for (i = 0; i < pad; i++) {
+                                       //printf(" ");
+                                       *p++ = padchar;
+                               }
+                               for (int i = 0; i < n; i++)
+                                       *p++ = t[i];
+                       }
 			*p = 0;
 			break;
+		}
 		default:
 			FATAL("can't happen: bad conversion %c in format()", flag);
 		}
@@ -1630,7 +1684,7 @@
 	} else if (sep == ',') {	/* CSV processing.  no error handling */
 		char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
 		for (;;) {
-			char *fr = newt; 
+			char *fr = newt;
 			n++;
 			if (*s == '"' ) { /* start of "..." */
 				for (s++ ; *s != '\0'; ) {
--