v2 / vlib / arrays / diff / diff.v
295 lines · 269 sloc · 6.94 KB · 35af6a8d12b70bf59e1df5d4fb517d04a6892b49
Raw
1module diff
2
3import strings
4
5// DiffChange contains one or more deletions or inserts at one position in two arrays.
6pub struct DiffChange {
7pub mut:
8 a int // position in input a []T
9 b int // position in input b []T
10 del int // delete Del elements from input a
11 ins int // insert Ins elements from input b
12}
13
14@[flag]
15enum DiffContextFlag {
16 delete
17 insert
18}
19
20pub struct DiffContext[T] {
21mut:
22 a []T
23 b []T
24 flags []DiffContextFlag
25 max int
26 // forward and reverse d-path endpoint x components
27 forward []int
28 reverse []int
29pub mut:
30 changes []DiffChange
31}
32
33// diff returns the difference of two arrays.
34pub fn diff[T](a []T, b []T) &DiffContext[T] {
35 mut c := &DiffContext[T]{
36 a: a
37 b: b
38 }
39 c.flags = if a.len > b.len {
40 []DiffContextFlag{len: a.len}
41 } else {
42 []DiffContextFlag{len: b.len}
43 }
44 c.max = a.len + b.len + 1
45 c.forward = []int{len: 2 * c.max}
46 c.reverse = []int{len: 2 * c.max}
47 c.compare(0, 0, a.len, b.len)
48 c.changes = c.result(a.len, b.len)
49 return c
50}
51
52// A directly conversion from https://github.com/covrom/diff
53// Fast diff library for Myers algorithm.
54// The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266
55@[direct_array_access]
56fn (mut c DiffContext[T]) compare(mut_aoffset int, mut_boffset int, mut_alimit int, mut_blimit int) {
57 mut aoffset := mut_aoffset
58 mut boffset := mut_boffset
59 mut alimit := mut_alimit
60 mut blimit := mut_blimit
61 // eat common prefix
62 for aoffset < alimit && boffset < blimit && c.a[aoffset] == c.b[boffset] {
63 aoffset++
64 boffset++
65 }
66 // eat common suffix
67 for alimit > aoffset && blimit > boffset && c.a[alimit - 1] == c.b[blimit - 1] {
68 alimit--
69 blimit--
70 }
71 // both equal or b inserts
72 if aoffset == alimit {
73 for boffset < blimit {
74 c.flags[boffset].set(.insert)
75 boffset++
76 }
77 return
78 }
79 // a deletes
80 if boffset == blimit {
81 for aoffset < alimit {
82 c.flags[aoffset].set(.delete)
83 aoffset++
84 }
85 return
86 }
87 x, y := c.find_middle_snake(aoffset, boffset, alimit, blimit)
88 c.compare(aoffset, boffset, x, y)
89 c.compare(x, y, alimit, blimit)
90}
91
92@[direct_array_access]
93fn (mut c DiffContext[T]) find_middle_snake(aoffset int, boffset int, alimit int, blimit int) (int, int) {
94 // midpoints
95 fmid := aoffset - boffset
96 rmid := alimit - blimit
97 // correct offset in d-path slices
98 foff := c.max - fmid
99 roff := c.max - rmid
100 isodd := (rmid - fmid) & 1 != 0
101 maxd := (alimit - aoffset + blimit - boffset + 2) / 2
102 c.forward[c.max + 1] = aoffset
103 c.reverse[c.max - 1] = alimit
104 mut x, mut y := 0, 0
105 for d := 0; d <= maxd; d++ {
106 // forward search
107 for k := fmid - d; k <= fmid + d; k += 2 {
108 if k == fmid - d || (k != fmid + d && c.forward[foff + k + 1] > c.forward[foff + k - 1]) {
109 x = c.forward[foff + k + 1] // down
110 } else {
111 x = c.forward[foff + k - 1] + 1 // right
112 }
113 y = x - k
114 for x < alimit && y < blimit && c.a[x] == c.b[y] {
115 x++
116 y++
117 }
118 c.forward[foff + k] = x
119 if isodd && k > rmid - d && k < rmid + d {
120 if c.reverse[roff + k] <= c.forward[foff + k] {
121 return x, x - k
122 }
123 }
124 }
125 // reverse search x,y correspond to u,v
126 for k := rmid - d; k <= rmid + d; k += 2 {
127 if k == rmid + d || (k != rmid - d && c.reverse[roff + k - 1] < c.reverse[roff + k + 1]) {
128 x = c.reverse[roff + k - 1] // up
129 } else {
130 x = c.reverse[roff + k + 1] - 1 // left
131 }
132 y = x - k
133 for x > aoffset && y > boffset && c.a[x - 1] == c.b[y - 1] {
134 x--
135 y--
136 }
137 c.reverse[roff + k] = x
138 if !isodd && k >= fmid - d && k <= fmid + d {
139 if c.reverse[roff + k] <= c.forward[foff + k] {
140 // lookup opposite end
141 x = c.forward[foff + k]
142 return x, x - k
143 }
144 }
145 }
146 }
147 panic('diff.find_middle_snake: should never be reached')
148}
149
150@[direct_array_access]
151fn (c DiffContext[T]) result(n int, m int) []DiffChange {
152 mut x, mut y := 0, 0
153 mut res := []DiffChange{}
154 for x < n || y < m {
155 if x < n && y < m && !c.flags[x].has(.delete) && !c.flags[y].has(.insert) {
156 x++
157 y++
158 } else {
159 mut a := x
160 mut b := y
161 for x < n && (y >= m || c.flags[x].has(.delete)) {
162 x++
163 }
164 for y < m && (x >= n || c.flags[y].has(.insert)) {
165 y++
166 }
167 if a < x || b < y {
168 res << DiffChange{a, b, x - a, y - b}
169 }
170 }
171 }
172 return res
173}
174
175// merge_changes merges neighboring changes smaller than the specified context_lines.
176// The changes must be ordered by ascending positions.
177@[direct_array_access]
178fn (mut c DiffContext[T]) merge_changes(context_lines int) {
179 if c.changes.len == 0 {
180 return
181 }
182
183 mut merged := []DiffChange{}
184 mut current := c.changes[0]
185
186 for i in 1 .. c.changes.len {
187 next := c.changes[i]
188 if next.a <= current.a + current.del + context_lines {
189 current = DiffChange{
190 a: current.a
191 b: current.b
192 del: next.a + next.del - current.a
193 ins: next.b + next.ins - current.b
194 }
195 } else {
196 merged << current
197 current = next
198 }
199 }
200 merged << current
201 c.changes = merged
202}
203
204@[params]
205pub struct DiffGenStrParam {
206pub mut:
207 colorful bool
208 unified int = 3 // how many context lines before/after diff block
209 block_header bool // output `@@ -3,4 +3,5 @@` or not
210}
211
212// generate_patch generate a diff string of two arrays.
213@[direct_array_access]
214pub fn (mut c DiffContext[T]) generate_patch(param DiffGenStrParam) string {
215 mut sb := strings.new_builder(100)
216 defer { unsafe { sb.free() } }
217
218 mut unified := if param.unified < 0 { 0 } else { param.unified }
219
220 c.merge_changes(unified)
221 if c.changes.len == 0 {
222 return ''
223 }
224
225 mut prev_a_end := 0
226 mut prev_b_end := 0
227
228 for change in c.changes {
229 ctx_start_a := int_max(prev_a_end, change.a - unified)
230 ctx_end_a := change.a + change.del + unified
231 ctx_start_b := int_max(prev_b_end, change.b - unified)
232 ctx_end_b := change.b + change.ins + unified
233
234 if param.block_header {
235 if param.colorful {
236 sb.write_string('\033[36m')
237 }
238 sb.writeln('@@ -${ctx_start_a + 1},${ctx_end_a - ctx_start_a} +${ctx_start_b + 1},${ctx_end_b - ctx_start_b} @@')
239 if param.colorful {
240 sb.write_string('\033[0m')
241 }
242 }
243
244 c.write_context(mut sb, ctx_start_b, change.b, param)
245 c.write_change(mut sb, change, param)
246 c.write_context(mut sb, change.b + change.ins, ctx_end_b, param)
247
248 prev_a_end = ctx_end_a
249 prev_b_end = ctx_end_b
250 }
251
252 return sb.str()
253}
254
255@[direct_array_access]
256fn (c DiffContext[T]) write_context(mut sb strings.Builder,
257 start int, end int,
258 param DiffGenStrParam) {
259 for i in start .. end {
260 if i >= c.b.len {
261 break
262 }
263
264 line := c.b[i].str()
265
266 if param.colorful {
267 sb.writeln('\033[37m${line}\033[0m')
268 } else {
269 sb.writeln(line)
270 }
271 }
272}
273
274@[direct_array_access]
275fn (c DiffContext[T]) write_change(mut sb strings.Builder,
276 change DiffChange,
277 param DiffGenStrParam) {
278 for i in change.a .. change.a + change.del {
279 line := c.a[i].str()
280 if param.colorful {
281 sb.writeln('\033[31m-${line}\033[0m')
282 } else {
283 sb.writeln('-${line}')
284 }
285 }
286
287 for i in change.b .. change.b + change.ins {
288 line := c.b[i].str()
289 if param.colorful {
290 sb.writeln('\033[32m+${line}\033[0m')
291 } else {
292 sb.writeln('+${line}')
293 }
294 }
295}
296