shithub: furgit

--- /dev/null

+++ b/diff/lines/diff.go

@@ -1,0 +1,223 @@

+// Package lines provides routines to perform line-based diffs.

+package lines

+import "bytes"

+// Diff performs a line-based diff.

+// Lines are bytes up to and including '\n' (final line may lack '\n').

+func Diff(oldB, newB []byte) ([]Chunk, error) {

+	type lineRef struct {

+		base  []byte

+		start int

+		end   int

+	}

+	split := func(b []byte) []lineRef {

+		if len(b) == 0 {

+			return nil

+		}

+		var res []lineRef

+		start := 0

+		for i := range b {

+			if b[i] == '\n' {

+				res = append(res, lineRef{base: b, start: start, end: i + 1})

+				start = i + 1

+			}

+		}

+		if start < len(b) {

+			res = append(res, lineRef{base: b, start: start, end: len(b)})

+		}

+		return res

+	}

+	oldLines := split(oldB)

+	newLines := split(newB)

+	n := len(oldLines)

+	m := len(newLines)

+	if n == 0 && m == 0 {

+		return nil, nil

+	}

+	idOf := make(map[string]int)

+	nextID := 0

+	oldIDs := make([]int, n)

+	for i, ln := range oldLines {

+		key := string(ln.base[ln.start:ln.end])

+		id, ok := idOf[key]

+		if !ok {

+			id = nextID

+			idOf[key] = id

+			nextID++

+		}

+		oldIDs[i] = id

+	}

+	newIDs := make([]int, m)

+	for i, ln := range newLines {

+		key := string(ln.base[ln.start:ln.end])

+		id, ok := idOf[key]

+		if !ok {

+			id = nextID

+			idOf[key] = id

+			nextID++

+		}

+		newIDs[i] = id

+	}

+	max := n + m

+	offset := max

+	trace := make([][]int, 0, max+1)

+	Vprev := make([]int, 2*max+1)

+	for i := range Vprev {

+		Vprev[i] = -1

+	}

+	x0 := 0

+	y0 := 0

+	for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] {

+		x0++

+		y0++

+	}

+	Vprev[offset+0] = x0

+	trace = append(trace, append([]int(nil), Vprev...))

+	found := x0 >= n && y0 >= m

+	for D := 1; D <= max && !found; D++ {

+		V := make([]int, 2*max+1)

+		for i := range V {

+			V[i] = -1

+		}

+		for k := -D; k <= D; k += 2 {

+			var x int

+			if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) {

+				x = Vprev[offset+(k+1)]

+			} else {

+				x = Vprev[offset+(k-1)] + 1

+			}

+			y := x - k

+			for x < n && y < m && oldIDs[x] == newIDs[y] {

+				x++

+				y++

+			}

+			V[offset+k] = x

+			if x >= n && y >= m {

+				trace = append(trace, V)

+				found = true

+				break

+			}

+		}

+		if !found {

+			trace = append(trace, V)

+			Vprev = V

+		}

+	}

+	type edit struct {

+		kind    ChunkKind

+		lineref lineRef

+	}

+	revEdits := make([]edit, 0, n+m)

+	x := n

+	y := m

+	for D := len(trace) - 1; D >= 0; D-- {

+		k := x - y

+		var (

+			prevK int

+			prevX int

+			prevY int

+		)

+		if D > 0 {

+			prevV := trace[D-1]

+			if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) {

+				prevK = k + 1

+			} else {

+				prevK = k - 1

+			}

+			prevX = prevV[offset+prevK]

+			prevY = prevX - prevK

+		}

+		for x > prevX && y > prevY {

+			x--

+			y--

+			revEdits = append(revEdits, edit{kind: ChunkKindUnchanged, lineref: oldLines[x]})

+		}

+		if D == 0 {

+			break

+		}

+		if x == prevX {

+			y--

+			revEdits = append(revEdits, edit{kind: ChunkKindAdded, lineref: newLines[y]})

+		} else {

+			x--

+			revEdits = append(revEdits, edit{kind: ChunkKindDeleted, lineref: oldLines[x]})

+		}

+	}

+	for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 {

+		revEdits[i], revEdits[j] = revEdits[j], revEdits[i]

+	}

+	var out []Chunk

+	type meta struct {

+		base  []byte

+		start int

+		end   int

+	}

+	var metas []meta

+	for _, e := range revEdits {

+		curBase := e.lineref.base

+		curStart := e.lineref.start

+		curEnd := e.lineref.end

+		if len(out) == 0 || out[len(out)-1].Kind != e.kind {

+			out = append(out, Chunk{Kind: e.kind, Data: curBase[curStart:curEnd]})

+			metas = append(metas, meta{base: curBase, start: curStart, end: curEnd})

+			continue

+		}

+		lastIdx := len(out) - 1

+		lastMeta := metas[lastIdx]

+		if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart {

+			metas[lastIdx].end = curEnd

+			out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end]

+			continue

+		}

+		out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...)

+		metas[lastIdx] = meta{base: nil, start: 0, end: 0}

+	}

+	return out, nil

+}

+// Chunk represents a contiguous region of lines categorized

+// as unchanged, deleted, or added.

+type Chunk struct {

+	Kind ChunkKind

+	Data []byte

+}

+// ChunkKind enumerates the type of diff chunk.

+type ChunkKind int

+const (

+	// ChunkKindUnchanged represents an unchanged diff chunk.

+	ChunkKindUnchanged ChunkKind = iota

+	// ChunkKindDeleted represents a deleted diff chunk.

+	ChunkKindDeleted

+	// ChunkKindAdded represents an added diff chunk.

+	ChunkKindAdded

+)

--- /dev/null

+++ b/diff/lines/diff_test.go

@@ -1,0 +1,326 @@

+package lines

+import (

+	"bytes"

+	"strconv"

+	"strings"

+	"testing"

+)

+func TestDiff(t *testing.T) {

+	t.Parallel()

+	tests := []struct {

+		name     string

+		oldInput string

+		newInput string

+		expected []Chunk

+	}{

+		{

+			name:     "empty inputs produce no chunks",

+			oldInput: "",

+			newInput: "",

+			expected: []Chunk{},

+		},

+		{

+			name:     "only additions",

+			oldInput: "",

+			newInput: "alpha\nbeta\n",

+			expected: []Chunk{

+				{Kind: ChunkKindAdded, Data: []byte("alpha\nbeta\n")},

+			},

+		},

+		{

+			name:     "only deletions",

+			oldInput: "alpha\nbeta\n",

+			newInput: "",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("alpha\nbeta\n")},

+			},

+		},

+		{

+			name:     "unchanged content is grouped",

+			oldInput: "same\nlines\n",

+			newInput: "same\nlines\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("same\nlines\n")},

+			},

+		},

+		{

+			name:     "insertion in the middle",

+			oldInput: "a\nb\nc\n",

+			newInput: "a\nb\nX\nc\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("a\nb\n")},

+				{Kind: ChunkKindAdded, Data: []byte("X\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("c\n")},

+			},

+		},

+		{

+			name:     "replacement without trailing newline",

+			oldInput: "first\nsecond",

+			newInput: "first\nsecond\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("first\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("second")},

+				{Kind: ChunkKindAdded, Data: []byte("second\n")},

+			},

+		},

+		{

+			name:     "line replacement",

+			oldInput: "a\nb\nc\n",

+			newInput: "a\nB\nc\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("a\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("b\n")},

+				{Kind: ChunkKindAdded, Data: []byte("B\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("c\n")},

+			},

+		},

+		{

+			name:     "swap adjacent lines",

+			oldInput: "A\nB\n",

+			newInput: "B\nA\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("A\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("B\n")},

+				{Kind: ChunkKindAdded, Data: []byte("A\n")},

+			},

+		},

+		{

+			name:     "indentation change is a full line replacement",

+			oldInput: "func main() {\n\treturn\n}\n",

+			newInput: "func main() {\n    return\n}\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("func main() {\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("\treturn\n")},

+				{Kind: ChunkKindAdded, Data: []byte("    return\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("}\n")},

+			},

+		},

+		{

+			name:     "commenting out lines",

+			oldInput: "code\n",

+			newInput: "// code\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("code\n")},

+				{Kind: ChunkKindAdded, Data: []byte("// code\n")},

+			},

+		},

+		{

+			name:     "reducing repeating lines",

+			oldInput: "log\nlog\nlog\n",

+			newInput: "log\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("log\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("log\nlog\n")},

+			},

+		},

+		{

+			name:     "expanding repeating lines",

+			oldInput: "tick\n",

+			newInput: "tick\ntick\ntick\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("tick\n")},

+				{Kind: ChunkKindAdded, Data: []byte("tick\ntick\n")},

+			},

+		},

+		{

+			name:     "interleaved modifications",

+			oldInput: "keep\nchange\nkeep\nchange\n",

+			newInput: "keep\nfixed\nkeep\nfixed\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("keep\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("change\n")},

+				{Kind: ChunkKindAdded, Data: []byte("fixed\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("keep\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("change\n")},

+				{Kind: ChunkKindAdded, Data: []byte("fixed\n")},

+			},

+		},

+		{

+			name:     "large common header and footer",

+			oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n",

+			newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("OLD\n")},

+				{Kind: ChunkKindAdded, Data: []byte("NEW\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("footer\nfooter\n")},

+			},

+		},

+		{

+			name:     "completely different content",

+			oldInput: "apple\nbanana\n",

+			newInput: "cherry\ndate\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("apple\nbanana\n")},

+				{Kind: ChunkKindAdded, Data: []byte("cherry\ndate\n")},

+			},

+		},

+		{

+			name:     "unicode and emoji changes",

+			oldInput: "Hello 🌍\nYay\n",

+			newInput: "Hello 🌎\nYay\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("Hello 🌍\n")},

+				{Kind: ChunkKindAdded, Data: []byte("Hello 🌎\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("Yay\n")},

+			},

+		},

+		{

+			name:     "binary data with embedded newlines",

+			oldInput: "\x00\x01\n\x02\x03\n",

+			newInput: "\x00\x01\n\x02\xFF\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("\x00\x01\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("\x02\x03\n")},

+				{Kind: ChunkKindAdded, Data: []byte("\x02\xFF\n")},

+			},

+		},

+		{

+			name:     "adding trailing newline to last line",

+			oldInput: "Line 1\nLine 2",

+			newInput: "Line 1\nLine 2\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("Line 1\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("Line 2")},

+				{Kind: ChunkKindAdded, Data: []byte("Line 2\n")},

+			},

+		},

+		{

+			name:     "removing trailing newline",

+			oldInput: "A\nB\n",

+			newInput: "A\nB",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("A\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("B\n")},

+				{Kind: ChunkKindAdded, Data: []byte("B")},

+			},

+		},

+		{

+			name:     "inserting blank lines",

+			oldInput: "A\nB\n",

+			newInput: "A\n\n\nB\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("A\n")},

+				{Kind: ChunkKindAdded, Data: []byte("\n\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("B\n")},

+			},

+		},

+		{

+			name:     "collapsing blank lines",

+			oldInput: "A\n\n\n\nB\n",

+			newInput: "A\nB\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("A\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("\n\n\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("B\n")},

+			},

+		},

+		{

+			name:     "case sensitivity check",

+			oldInput: "FOO\nbar\n",

+			newInput: "foo\nbar\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("FOO\n")},

+				{Kind: ChunkKindAdded, Data: []byte("foo\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("bar\n")},

+			},

+		},

+		{

+			name:     "partial line match is full mismatch",

+			oldInput: "The quick brown fox\n",

+			newInput: "The quick brown fox jumps\n",

+			expected: []Chunk{

+				{Kind: ChunkKindDeleted, Data: []byte("The quick brown fox\n")},

+				{Kind: ChunkKindAdded, Data: []byte("The quick brown fox jumps\n")},

+			},

+		},

+		{

+			name:     "inserting middle content",

+			oldInput: "Top\nBottom\n",

+			newInput: "Top\nMiddle\nBottom\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("Top\n")},

+				{Kind: ChunkKindAdded, Data: []byte("Middle\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("Bottom\n")},

+			},

+		},

+		{

+			name:     "block move simulated",

+			oldInput: "BlockA\nBlockB\nBlockC\n",

+			newInput: "BlockA\nBlockC\nBlockB\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("BlockA\n")},

+				{Kind: ChunkKindDeleted, Data: []byte("BlockB\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("BlockC\n")},

+				{Kind: ChunkKindAdded, Data: []byte("BlockB\n")},

+			},

+		},

+		{

+			name:     "alternating additions",

+			oldInput: "A\nB\nC\n",

+			newInput: "A\n1\nB\n2\nC\n",

+			expected: []Chunk{

+				{Kind: ChunkKindUnchanged, Data: []byte("A\n")},

+				{Kind: ChunkKindAdded, Data: []byte("1\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("B\n")},

+				{Kind: ChunkKindAdded, Data: []byte("2\n")},

+				{Kind: ChunkKindUnchanged, Data: []byte("C\n")},

+			},

+		},

+	}

+	for _, tt := range tests {

+		t.Run(tt.name, func(t *testing.T) {

+			t.Parallel()

+			chunks, err := Diff([]byte(tt.oldInput), []byte(tt.newInput))

+			if err != nil {

+				t.Fatalf("Diff returned error: %v", err)

+			}

+			if len(chunks) != len(tt.expected) {

+				t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks))

+			}

+			for i := range tt.expected {

+				if chunks[i].Kind != tt.expected[i].Kind {

+					t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks))

+				}

+				if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) {

+					t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks))

+				}

+			}

+		})

+	}

+}

+func formatChunks(chunks []Chunk) string {

+	var b strings.Builder

+	b.WriteByte('[')

+	for i, chunk := range chunks {

+		if i > 0 {

+			b.WriteString(", ")

+		}

+		b.WriteString(chunkKindName(chunk.Kind))

+		b.WriteByte(':')

+		b.WriteString(strconv.Quote(string(chunk.Data)))

+	}

+	b.WriteByte(']')

+	return b.String()

+}

+func chunkKindName(kind ChunkKind) string {

+	switch kind {

+	case ChunkKindUnchanged:

+		return "U"

+	case ChunkKindDeleted:

+		return "D"

+	case ChunkKindAdded:

+		return "A"

+	default:

+		return "?"

+	}

+}

--

⑨