shithub: trueawk

Download patch

ref: e7ad51dc32aa7b61dfce903c82c82f75b93bfae7
parent: 328f0e4eddc575d7072c17e6082d91bb9851c878
author: Arnold D. Robbins <arnold@skeeve.com>
date: Tue Oct 10 17:05:18 EDT 2023

Sort the gototab entries and binary search them.

--- a/b.c
+++ b/b.c
@@ -96,9 +96,8 @@
    mechanism of the goto table used 8-bit byte indices into the
    gototab entries to compute the next state.  Unicode is a lot
    bigger, so the gototab entries are now structs with a character
-   and a next state, and there is a linear search of the characters
-   to find the state.  (Yes, this is slower, by a significant
-   amount.  Tough.)
+   and a next state. These are sorted by code point and binary
+   searched.
 
    Throughout the RE mechanism in b.c, utf-8 characters are
    converted to their utf-32 value.  This mostly shows up in
@@ -113,6 +112,7 @@
 
  */
 
+static int entry_cmp(const void *l, const void *r);
 static int get_gototab(fa*, int, int);
 static int set_gototab(fa*, int, int, int);
 extern int u8_rune(int *, const uschar *);
@@ -597,6 +597,7 @@
 
 static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */
 {
+#if 0
 	int i;
 	for (i = 0; i < f->gototab[state].allocated; i++) {
 		if (f->gototab[state].entries[i].ch == 0)
@@ -605,8 +606,34 @@
 			return f->gototab[state].entries[i].state;
 	}
 	return 0;
+#else
+
+	gtte key;
+	gtte *item;
+
+	key.ch = ch;
+	key.state = 0;	/* irrelevant */
+	item = bsearch(& key, f->gototab[state].entries,
+			f->gototab[state].allocated, sizeof(gtte),
+			entry_cmp);
+
+	if (item == NULL)
+		return 0;
+	else
+		return item->state;
+#endif
 }
 
+static int entry_cmp(const void *l, const void *r)
+{
+	const gtte *left, *right;
+
+	left = (const gtte *) l;
+	right = (const gtte *) r;
+
+	return left->ch - right->ch;
+}
+
 static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */
 {
 	int i;
@@ -625,6 +652,8 @@
 	f->gototab[state].entries = p;
 	f->gototab[state].entries[i].ch = ch;
 	f->gototab[state].entries[i].state = val;
+
+	qsort(p, f->gototab[state].allocated, sizeof(gtte), entry_cmp);
 
 	return val; /* not used anywhere at the moment */
 }
--