shithub: trueawk

Download patch

ref: c6a474b1744122b5631c020b70616845353f8576
parent: e3c63b9e62e404ad7aa2ac3780503e0a6ace6d2f
parent: 144b14f99420a9ac9a63d177c11e99bec8b45b64
author: ozan yigit <ozan.yigit@gmail.com>
date: Wed Nov 15 11:21:43 EST 2023

miguel's consolidation of sub/gsub.

--- a/maketab.c
+++ b/maketab.c
@@ -52,8 +52,8 @@
 	{ ARRAY, "array", NULL },
 	{ INDIRECT, "indirect", "$(" },
 	{ SUBSTR, "substr", "substr" },
-	{ SUB, "sub", "sub" },
-	{ GSUB, "gsub", "gsub" },
+	{ SUB, "dosub", "sub" },
+	{ GSUB, "dosub", "gsub" },
 	{ INDEX, "sindex", "sindex" },
 	{ SPRINTF, "awksprintf", "sprintf " },
 	{ ADD, "arith", " + " },
--- a/proto.h
+++ b/proto.h
@@ -196,8 +196,7 @@
 extern	const char	*filename(FILE *);
 extern	Cell	*closefile(Node **, int);
 extern	void	closeall(void);
-extern	Cell	*sub(Node **, int);
-extern	Cell	*gsub(Node **, int);
+extern	Cell	*dosub(Node **, int);
 
 extern	FILE	*popen(const char *, const char *);
 extern	int	pclose(FILE *);
--- a/run.c
+++ b/run.c
@@ -2397,169 +2397,143 @@
 
 void backsub(char **pb_ptr, const char **sptr_ptr);
 
-Cell *sub(Node **a, int nnn)	/* substitute command */
+Cell *dosub(Node **a, int subop)        /* sub and gsub */
 {
-	const char *sptr, *q;
-	Cell *x, *y, *result;
-	char *t, *buf, *pb;
 	fa *pfa;
+	int tempstat;
+	char *repl;
+	Cell *x;
+
+	char *buf = NULL;
+	char *pb = NULL;
 	int bufsz = recsize;
 
-	if ((buf = (char *) malloc(bufsz)) == NULL)
-		FATAL("out of memory in sub");
-	x = execute(a[3]);	/* target string */
-	t = getsval(x);
-	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
-		pfa = (fa *) a[1];	/* regular expression */
-	else {
-		y = execute(a[1]);
-		pfa = makedfa(getsval(y), 1);
-		tempfree(y);
+	const char *r, *s;
+	const char *start;
+	const char *noempty = NULL;      /* empty match disallowed here */
+	size_t m = 0;                    /* match count */
+	size_t whichm;                   /* which match to select, 0 = global */
+	int mtype;                       /* match type */
+
+	if (a[0] == NULL) {	/* 0 => a[1] is already-compiled regexpr */
+		pfa = (fa *) a[1];
+	} else {
+		x = execute(a[1]);
+		pfa = makedfa(getsval(x), 1);
+		tempfree(x);
 	}
-	y = execute(a[2]);	/* replacement string */
-	result = False;
-	if (pmatch(pfa, t)) {
-		sptr = t;
-		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
-		pb = buf;
-		while (sptr < patbeg)
-			*pb++ = *sptr++;
-		sptr = getsval(y);
-		while (*sptr != '\0') {
-			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
-			if (*sptr == '\\') {
-				backsub(&pb, &sptr);
-			} else if (*sptr == '&') {
-				sptr++;
-				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
-				for (q = patbeg; q < patbeg+patlen; )
-					*pb++ = *q++;
-			} else
-				*pb++ = *sptr++;
+
+	x = execute(a[2]);	/* replacement string */
+	repl = tostring(getsval(x));
+	tempfree(x);
+
+	switch (subop) {
+	case SUB:
+		whichm = 1;
+		x = execute(a[3]);    /* source string */
+		break;
+	case GSUB:
+		whichm = 0;
+		x = execute(a[3]);    /* source string */
+		break;
+	default:
+		FATAL("dosub: unrecognized subop: %d", subop);
+	}
+
+	start = getsval(x);
+	while (pmatch(pfa, start)) {
+		if (buf == NULL) {
+			if ((pb = buf = malloc(bufsz)) == NULL)
+				FATAL("out of memory in dosub");
+			tempstat = pfa->initstat;
+			pfa->initstat = 2;
 		}
-		*pb = '\0';
-		if (pb > buf + bufsz)
-			FATAL("sub result1 %.30s too big; can't happen", buf);
-		sptr = patbeg + patlen;
-		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
-			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
-			while ((*pb++ = *sptr++) != '\0')
-				continue;
+
+		/* match types */
+		#define	MT_IGNORE  0  /* unselected or invalid */
+		#define MT_INSERT  1  /* selected, empty */
+		#define MT_REPLACE 2  /* selected, not empty */
+
+		/* an empty match just after replacement is invalid */
+
+		if (patbeg == noempty && patlen == 0) {
+			mtype = MT_IGNORE;    /* invalid, not counted */
+		} else if (whichm == ++m || whichm == 0) {
+			mtype = patlen ? MT_REPLACE : MT_INSERT;
+		} else {
+			mtype = MT_IGNORE;    /* unselected, but counted */
 		}
-		if (pb > buf + bufsz)
-			FATAL("sub result2 %.30s too big; can't happen", buf);
-		setsval(x, buf);	/* BUG: should be able to avoid copy */
-		result = True;
-	}
-	tempfree(x);
-	tempfree(y);
-	free(buf);
-	return result;
-}
 
-Cell *gsub(Node **a, int nnn)	/* global substitute */
-{
-	Cell *x, *y;
-	char *rptr, *pb;
-	const char *q, *t, *sptr;
-	char *buf;
-	fa *pfa;
-	int mflag, tempstat, num;
-	int bufsz = recsize;
-	int charlen = 0;
+		/* leading text: */
+		if (patbeg > start) {
+			adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
+				recsize, &pb, "dosub");
+			s = start;
+			while (s < patbeg)
+				*pb++ = *s++;
+		}
 
-	if ((buf = (char *) malloc(bufsz)) == NULL)
-		FATAL("out of memory in gsub");
-	mflag = 0;	/* if mflag == 0, can replace empty string */
-	num = 0;
-	x = execute(a[3]);	/* target string */
-	t = getsval(x);
-	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
-		pfa = (fa *) a[1];	/* regular expression */
-	else {
-		y = execute(a[1]);
-		pfa = makedfa(getsval(y), 1);
-		tempfree(y);
-	}
-	y = execute(a[2]);	/* replacement string */
-	if (pmatch(pfa, t)) {
-		tempstat = pfa->initstat;
-		pfa->initstat = 2;
-		pb = buf;
-		rptr = getsval(y);
-		do {
-			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
-				if (mflag == 0) {	/* can replace empty */
-					num++;
-					sptr = rptr;
-					while (*sptr != '\0') {
-						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
-						if (*sptr == '\\') {
-							backsub(&pb, &sptr);
-						} else if (*sptr == '&') {
-							sptr++;
-							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
-							for (q = patbeg; q < patbeg+patlen; )
-								*pb++ = *q++;
-						} else
-							*pb++ = *sptr++;
-					}
-				}
-				if (*t == '\0')	/* at end */
-					goto done;
-				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
-				charlen = u8_nextlen(t);
-				while (charlen-- > 0)
-					*pb++ = *t++;
-				if (pb > buf + bufsz)	/* BUG: not sure of this test */
-					FATAL("gsub result0 %.30s too big; can't happen", buf);
-				mflag = 0;
+		if (mtype == MT_IGNORE)
+			goto matching_text;  /* skip replacement text */
+
+		r = repl;
+		while (*r != 0) {
+			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
+			if (*r == '\\') {
+				backsub(&pb, &r);
+			} else if (*r == '&') {
+				r++;
+				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
+					&pb, "dosub");
+				for (s = patbeg; s < patbeg+patlen; )
+					*pb++ = *s++;
+			} else {
+				*pb++ = *r++;
 			}
-			else {	/* matched nonempty string */
-				num++;
-				sptr = t;
-				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
-				while (sptr < patbeg)
-					*pb++ = *sptr++;
-				sptr = rptr;
-				while (*sptr != '\0') {
-					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
-					if (*sptr == '\\') {
-						backsub(&pb, &sptr);
-					} else if (*sptr == '&') {
-						sptr++;
-						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
-						for (q = patbeg; q < patbeg+patlen; )
-							*pb++ = *q++;
-					} else
-						*pb++ = *sptr++;
-				}
-				t = patbeg + patlen;
-				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
-					goto done;
-				if (pb > buf + bufsz)
-					FATAL("gsub result1 %.30s too big; can't happen", buf);
-				mflag = 1;
-			}
-		} while (pmatch(pfa,t));
-		sptr = t;
-		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
-		while ((*pb++ = *sptr++) != '\0')
-			continue;
-	done:	if (pb < buf + bufsz)
-			*pb = '\0';
-		else if (*(pb-1) != '\0')
-			FATAL("gsub result2 %.30s truncated; can't happen", buf);
-		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
+		}
+
+matching_text:
+		if (mtype == MT_REPLACE || *patbeg == '\0')
+			goto next_search;  /* skip matching text */
+		
+		if (patlen == 0)
+			patlen = u8_nextlen(patbeg);
+		adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
+		s = patbeg;
+		while (s < patbeg + patlen)
+			*pb++ = *s++;
+
+next_search:
+		start = patbeg + patlen;
+		if (m == whichm || *patbeg == '\0')
+			break;
+		if (mtype == MT_REPLACE)
+			noempty = start;
+
+		#undef MT_IGNORE
+		#undef MT_INSERT
+		#undef MT_REPLACE
+	}
+
+	xfree(repl);
+
+	if (buf != NULL) {
 		pfa->initstat = tempstat;
+
+		/* trailing text */
+		adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
+		while ((*pb++ = *start++) != '\0')
+			;
+
+		setsval(x, buf);
+		free(buf);
 	}
+
 	tempfree(x);
-	tempfree(y);
 	x = gettemp();
 	x->tval = NUM;
-	x->fval = num;
-	free(buf);
-	return(x);
+	x->fval = m;
+	return x;
 }
 
 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
--