ref: b01bc1a344c47ded15342f7872832daa1bf5cfce
parent: d8b384519a05136bb6f311c18cd05c2e4c494416
author: Runxi Yu <me@runxiyu.org>
date: Sat Feb 21 08:25:21 EST 2026
diff/lines: Line-based diffs via Myers
--- /dev/null
+++ b/diff/lines/diff.go
@@ -1,0 +1,223 @@
+// Package lines provides routines to perform line-based diffs.
+package lines
+
+import "bytes"
+
+// Diff performs a line-based diff.
+// Lines are bytes up to and including '\n' (final line may lack '\n').
+func Diff(oldB, newB []byte) ([]Chunk, error) {+ type lineRef struct {+ base []byte
+ start int
+ end int
+ }
+
+ split := func(b []byte) []lineRef {+ if len(b) == 0 {+ return nil
+ }
+ var res []lineRef
+ start := 0
+ for i := range b {+ if b[i] == '\n' {+ res = append(res, lineRef{base: b, start: start, end: i + 1})+ start = i + 1
+ }
+ }
+ if start < len(b) {+ res = append(res, lineRef{base: b, start: start, end: len(b)})+ }
+ return res
+ }
+
+ oldLines := split(oldB)
+ newLines := split(newB)
+
+ n := len(oldLines)
+ m := len(newLines)
+ if n == 0 && m == 0 {+ return nil, nil
+ }
+
+ idOf := make(map[string]int)
+ nextID := 0
+ oldIDs := make([]int, n)
+ for i, ln := range oldLines {+ key := string(ln.base[ln.start:ln.end])
+ id, ok := idOf[key]
+ if !ok {+ id = nextID
+ idOf[key] = id
+ nextID++
+ }
+ oldIDs[i] = id
+ }
+ newIDs := make([]int, m)
+ for i, ln := range newLines {+ key := string(ln.base[ln.start:ln.end])
+ id, ok := idOf[key]
+ if !ok {+ id = nextID
+ idOf[key] = id
+ nextID++
+ }
+ newIDs[i] = id
+ }
+
+ max := n + m
+ offset := max
+ trace := make([][]int, 0, max+1)
+
+ Vprev := make([]int, 2*max+1)
+ for i := range Vprev {+ Vprev[i] = -1
+ }
+
+ x0 := 0
+ y0 := 0
+ for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] {+ x0++
+ y0++
+ }
+ Vprev[offset+0] = x0
+ trace = append(trace, append([]int(nil), Vprev...))
+
+ found := x0 >= n && y0 >= m
+
+ for D := 1; D <= max && !found; D++ {+ V := make([]int, 2*max+1)
+ for i := range V {+ V[i] = -1
+ }
+
+ for k := -D; k <= D; k += 2 {+ var x int
+ if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) {+ x = Vprev[offset+(k+1)]
+ } else {+ x = Vprev[offset+(k-1)] + 1
+ }
+ y := x - k
+
+ for x < n && y < m && oldIDs[x] == newIDs[y] {+ x++
+ y++
+ }
+ V[offset+k] = x
+
+ if x >= n && y >= m {+ trace = append(trace, V)
+ found = true
+ break
+ }
+ }
+
+ if !found {+ trace = append(trace, V)
+ Vprev = V
+ }
+ }
+
+ type edit struct {+ kind ChunkKind
+ lineref lineRef
+ }
+ revEdits := make([]edit, 0, n+m)
+
+ x := n
+ y := m
+ for D := len(trace) - 1; D >= 0; D-- {+ k := x - y
+
+ var (
+ prevK int
+ prevX int
+ prevY int
+ )
+ if D > 0 {+ prevV := trace[D-1]
+ if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) {+ prevK = k + 1
+ } else {+ prevK = k - 1
+ }
+ prevX = prevV[offset+prevK]
+ prevY = prevX - prevK
+ }
+
+ for x > prevX && y > prevY {+ x--
+ y--
+ revEdits = append(revEdits, edit{kind: ChunkKindUnchanged, lineref: oldLines[x]})+ }
+
+ if D == 0 {+ break
+ }
+
+ if x == prevX {+ y--
+ revEdits = append(revEdits, edit{kind: ChunkKindAdded, lineref: newLines[y]})+ } else {+ x--
+ revEdits = append(revEdits, edit{kind: ChunkKindDeleted, lineref: oldLines[x]})+ }
+ }
+
+ for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 {+ revEdits[i], revEdits[j] = revEdits[j], revEdits[i]
+ }
+
+ var out []Chunk
+ type meta struct {+ base []byte
+ start int
+ end int
+ }
+ var metas []meta
+
+ for _, e := range revEdits {+ curBase := e.lineref.base
+ curStart := e.lineref.start
+ curEnd := e.lineref.end
+
+ if len(out) == 0 || out[len(out)-1].Kind != e.kind {+ out = append(out, Chunk{Kind: e.kind, Data: curBase[curStart:curEnd]})+ metas = append(metas, meta{base: curBase, start: curStart, end: curEnd})+ continue
+ }
+
+ lastIdx := len(out) - 1
+ lastMeta := metas[lastIdx]
+
+ if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart {+ metas[lastIdx].end = curEnd
+ out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end]
+ continue
+ }
+
+ out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...)
+ metas[lastIdx] = meta{base: nil, start: 0, end: 0}+ }
+
+ return out, nil
+}
+
+// Chunk represents a contiguous region of lines categorized
+// as unchanged, deleted, or added.
+type Chunk struct {+ Kind ChunkKind
+ Data []byte
+}
+
+// ChunkKind enumerates the type of diff chunk.
+type ChunkKind int
+
+const (
+ // ChunkKindUnchanged represents an unchanged diff chunk.
+ ChunkKindUnchanged ChunkKind = iota
+ // ChunkKindDeleted represents a deleted diff chunk.
+ ChunkKindDeleted
+ // ChunkKindAdded represents an added diff chunk.
+ ChunkKindAdded
+)
--- /dev/null
+++ b/diff/lines/diff_test.go
@@ -1,0 +1,326 @@
+package lines
+
+import (
+ "bytes"
+ "strconv"
+ "strings"
+ "testing"
+)
+
+func TestDiff(t *testing.T) {+ t.Parallel()
+
+ tests := []struct {+ name string
+ oldInput string
+ newInput string
+ expected []Chunk
+ }{+ {+ name: "empty inputs produce no chunks",
+ oldInput: "",
+ newInput: "",
+ expected: []Chunk{},+ },
+ {+ name: "only additions",
+ oldInput: "",
+ newInput: "alpha\nbeta\n",
+ expected: []Chunk{+ {Kind: ChunkKindAdded, Data: []byte("alpha\nbeta\n")},+ },
+ },
+ {+ name: "only deletions",
+ oldInput: "alpha\nbeta\n",
+ newInput: "",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("alpha\nbeta\n")},+ },
+ },
+ {+ name: "unchanged content is grouped",
+ oldInput: "same\nlines\n",
+ newInput: "same\nlines\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("same\nlines\n")},+ },
+ },
+ {+ name: "insertion in the middle",
+ oldInput: "a\nb\nc\n",
+ newInput: "a\nb\nX\nc\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("a\nb\n")},+ {Kind: ChunkKindAdded, Data: []byte("X\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("c\n")},+ },
+ },
+ {+ name: "replacement without trailing newline",
+ oldInput: "first\nsecond",
+ newInput: "first\nsecond\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("first\n")},+ {Kind: ChunkKindDeleted, Data: []byte("second")},+ {Kind: ChunkKindAdded, Data: []byte("second\n")},+ },
+ },
+ {+ name: "line replacement",
+ oldInput: "a\nb\nc\n",
+ newInput: "a\nB\nc\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("a\n")},+ {Kind: ChunkKindDeleted, Data: []byte("b\n")},+ {Kind: ChunkKindAdded, Data: []byte("B\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("c\n")},+ },
+ },
+ {+ name: "swap adjacent lines",
+ oldInput: "A\nB\n",
+ newInput: "B\nA\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("A\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("B\n")},+ {Kind: ChunkKindAdded, Data: []byte("A\n")},+ },
+ },
+ {+ name: "indentation change is a full line replacement",
+ oldInput: "func main() {\n\treturn\n}\n",+ newInput: "func main() {\n return\n}\n",+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("func main() {\n")},+ {Kind: ChunkKindDeleted, Data: []byte("\treturn\n")},+ {Kind: ChunkKindAdded, Data: []byte(" return\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("}\n")},+ },
+ },
+ {+ name: "commenting out lines",
+ oldInput: "code\n",
+ newInput: "// code\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("code\n")},+ {Kind: ChunkKindAdded, Data: []byte("// code\n")},+ },
+ },
+ {+ name: "reducing repeating lines",
+ oldInput: "log\nlog\nlog\n",
+ newInput: "log\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("log\n")},+ {Kind: ChunkKindDeleted, Data: []byte("log\nlog\n")},+ },
+ },
+ {+ name: "expanding repeating lines",
+ oldInput: "tick\n",
+ newInput: "tick\ntick\ntick\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("tick\n")},+ {Kind: ChunkKindAdded, Data: []byte("tick\ntick\n")},+ },
+ },
+ {+ name: "interleaved modifications",
+ oldInput: "keep\nchange\nkeep\nchange\n",
+ newInput: "keep\nfixed\nkeep\nfixed\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("keep\n")},+ {Kind: ChunkKindDeleted, Data: []byte("change\n")},+ {Kind: ChunkKindAdded, Data: []byte("fixed\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("keep\n")},+ {Kind: ChunkKindDeleted, Data: []byte("change\n")},+ {Kind: ChunkKindAdded, Data: []byte("fixed\n")},+ },
+ },
+ {+ name: "large common header and footer",
+ oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n",
+ newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")},+ {Kind: ChunkKindDeleted, Data: []byte("OLD\n")},+ {Kind: ChunkKindAdded, Data: []byte("NEW\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("footer\nfooter\n")},+ },
+ },
+ {+ name: "completely different content",
+ oldInput: "apple\nbanana\n",
+ newInput: "cherry\ndate\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("apple\nbanana\n")},+ {Kind: ChunkKindAdded, Data: []byte("cherry\ndate\n")},+ },
+ },
+ {+ name: "unicode and emoji changes",
+ oldInput: "Hello 🌍\nYay\n",
+ newInput: "Hello 🌎\nYay\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("Hello 🌍\n")},+ {Kind: ChunkKindAdded, Data: []byte("Hello 🌎\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("Yay\n")},+ },
+ },
+ {+ name: "binary data with embedded newlines",
+ oldInput: "\x00\x01\n\x02\x03\n",
+ newInput: "\x00\x01\n\x02\xFF\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("\x00\x01\n")},+ {Kind: ChunkKindDeleted, Data: []byte("\x02\x03\n")},+ {Kind: ChunkKindAdded, Data: []byte("\x02\xFF\n")},+ },
+ },
+ {+ name: "adding trailing newline to last line",
+ oldInput: "Line 1\nLine 2",
+ newInput: "Line 1\nLine 2\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("Line 1\n")},+ {Kind: ChunkKindDeleted, Data: []byte("Line 2")},+ {Kind: ChunkKindAdded, Data: []byte("Line 2\n")},+ },
+ },
+ {+ name: "removing trailing newline",
+ oldInput: "A\nB\n",
+ newInput: "A\nB",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("A\n")},+ {Kind: ChunkKindDeleted, Data: []byte("B\n")},+ {Kind: ChunkKindAdded, Data: []byte("B")},+ },
+ },
+ {+ name: "inserting blank lines",
+ oldInput: "A\nB\n",
+ newInput: "A\n\n\nB\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("A\n")},+ {Kind: ChunkKindAdded, Data: []byte("\n\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("B\n")},+ },
+ },
+ {+ name: "collapsing blank lines",
+ oldInput: "A\n\n\n\nB\n",
+ newInput: "A\nB\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("A\n")},+ {Kind: ChunkKindDeleted, Data: []byte("\n\n\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("B\n")},+ },
+ },
+ {+ name: "case sensitivity check",
+ oldInput: "FOO\nbar\n",
+ newInput: "foo\nbar\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("FOO\n")},+ {Kind: ChunkKindAdded, Data: []byte("foo\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("bar\n")},+ },
+ },
+ {+ name: "partial line match is full mismatch",
+ oldInput: "The quick brown fox\n",
+ newInput: "The quick brown fox jumps\n",
+ expected: []Chunk{+ {Kind: ChunkKindDeleted, Data: []byte("The quick brown fox\n")},+ {Kind: ChunkKindAdded, Data: []byte("The quick brown fox jumps\n")},+ },
+ },
+ {+ name: "inserting middle content",
+ oldInput: "Top\nBottom\n",
+ newInput: "Top\nMiddle\nBottom\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("Top\n")},+ {Kind: ChunkKindAdded, Data: []byte("Middle\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("Bottom\n")},+ },
+ },
+ {+ name: "block move simulated",
+ oldInput: "BlockA\nBlockB\nBlockC\n",
+ newInput: "BlockA\nBlockC\nBlockB\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("BlockA\n")},+ {Kind: ChunkKindDeleted, Data: []byte("BlockB\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("BlockC\n")},+ {Kind: ChunkKindAdded, Data: []byte("BlockB\n")},+ },
+ },
+ {+ name: "alternating additions",
+ oldInput: "A\nB\nC\n",
+ newInput: "A\n1\nB\n2\nC\n",
+ expected: []Chunk{+ {Kind: ChunkKindUnchanged, Data: []byte("A\n")},+ {Kind: ChunkKindAdded, Data: []byte("1\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("B\n")},+ {Kind: ChunkKindAdded, Data: []byte("2\n")},+ {Kind: ChunkKindUnchanged, Data: []byte("C\n")},+ },
+ },
+ }
+
+ for _, tt := range tests {+ t.Run(tt.name, func(t *testing.T) {+ t.Parallel()
+
+ chunks, err := Diff([]byte(tt.oldInput), []byte(tt.newInput))
+ if err != nil {+ t.Fatalf("Diff returned error: %v", err)+ }
+
+ if len(chunks) != len(tt.expected) {+ t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks))+ }
+
+ for i := range tt.expected {+ if chunks[i].Kind != tt.expected[i].Kind {+ t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks))+ }
+ if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) {+ t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks))+ }
+ }
+ })
+ }
+}
+
+func formatChunks(chunks []Chunk) string {+ var b strings.Builder
+ b.WriteByte('[')+ for i, chunk := range chunks {+ if i > 0 {+ b.WriteString(", ")+ }
+ b.WriteString(chunkKindName(chunk.Kind))
+ b.WriteByte(':')+ b.WriteString(strconv.Quote(string(chunk.Data)))
+ }
+ b.WriteByte(']')+ return b.String()
+}
+
+func chunkKindName(kind ChunkKind) string {+ switch kind {+ case ChunkKindUnchanged:
+ return "U"
+ case ChunkKindDeleted:
+ return "D"
+ case ChunkKindAdded:
+ return "A"
+ default:
+ return "?"
+ }
+}
--
⑨