Gitly


1 module tar
2 
3 // Untar uses a reader to parse the contents of a unix tar file.
4 // Reuses a fixed array of 512 bytes to parse each TAR block.
5 @[heap]
6 pub struct Untar {
7 mut:
8     reader     Reader
9     max_blocks int
10     buffer     [512]u8 // data to parse block
11     read       Read    // last read to send/receive to/from reader implementation
12 
13     state State // true when reading data blocks or long names
14     size  int   // remaining data size during state_data
15 
16     long_path &LongPath = unsafe { nil } // not nil to hold a file long_name
17 
18     blank_block int = -1 // last no-data block with all-zeros
19 }
20 
21 enum State {
22     header
23     data
24     long_path
25 }
26 
27 // new_untar builds a untar with a given Reader.
28 pub fn new_untar(reader Reader) &Untar {
29     return &Untar{
30         reader: reader
31     }
32 }
33 
34 // str returns a string representation with max_blocks and last read.
35 pub fn (u Untar) str() string {
36     return 'max_blocks:${u.max_blocks} last_read:${u.read}'
37 }
38 
39 // read_all_blocks parses the data blocks of any decompressed *.tar.gz array.
40 // The data blocks length must be divisible by 512.
41 pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult {
42     if blocks.len % 512 != 0 {
43         return error('data_blocks size is not a multiple of 512')
44     }
45     u.max_blocks = blocks.len / 512
46     for i := 0; i < blocks.len; i += 512 {
47         result := u.read_single_block(blocks[i..i + 512])!
48         if result != .continue {
49             return result
50         }
51     }
52     return .end_of_file
53 }
54 
55 // read_single_block parses one data block at a time.
56 // The data block length must be 512. Two consecutive no data blocks
57 // have 512 zeroes returns a .end_archive result.
58 pub fn (mut u Untar) read_single_block(block []u8) !ReadResult {
59     if block.len != 512 {
60         return error('data_block size is not 512')
61     }
62     u.read.block_number++ // 1,2,3...
63 
64     mut is_blank_block := true
65     for i in 0 .. 512 {
66         u.buffer[i] = block[i]
67         if block[i] != 0 {
68             is_blank_block = false
69         }
70     }
71     match u.state {
72         .header {
73             if is_blank_block {
74                 // current non-data block is a blank block
75                 prev_block := u.read.block_number - 1
76                 result := if u.blank_block == prev_block {
77                     // two consecutive blank blocks
78                     u.read.special = .blank_2
79                     ReadResult.end_archive
80                 } else {
81                     // first blank block
82                     u.read.special = .blank_1
83                     ReadResult.continue
84                 }
85                 u.read.path_len = 0
86                 u.reader.other_block(mut u.read, '${result}')
87                 u.blank_block = u.read.block_number
88                 return result
89             }
90             u.read_header()!
91         }
92         .data {
93             u.read_data()
94         }
95         .long_path {
96             u.read_long_path()
97         }
98     }
99 
100     return if u.read.stop_early {
101         .stop_early
102     } else {
103         .continue
104     }
105 }
106 
107 fn (mut u Untar) read_header() ! {
108     u.size = int(u.extract_octal(124, 12))
109     header := u.buffer[156] // pos 0x9c
110     block_header := BlockHeader.from(header) or {
111         u.read.special = .unknown
112         u.read.path_len = 0
113         u.reader.other_block(mut u.read, 'size:${u.size}')
114         return
115     }
116     match block_header {
117         .dir {
118             if !u.checksum_ok() {
119                 return error('Checksum error: directory reading:${u.read}')
120             }
121             u.read.special = .no
122             u.read.set_short_path(u.buffer, false)
123             u.reader.dir_block(mut u.read, u64(u.size))
124             // u.state = .header
125         }
126         .file {
127             if !u.checksum_ok() {
128                 return error('Checksum error file reading:${u.read}')
129             }
130             u.read.special = .no
131             if u.long_path != unsafe { nil } {
132                 u.read.set_long_path(u.long_path)
133                 if u.size > 0 {
134                     u.state = .data
135                 }
136             } else {
137                 u.read.set_short_path(u.buffer, true)
138                 if u.size > 0 {
139                     u.state = .data
140                 }
141             }
142             u.reader.file_block(mut u.read, u64(u.size))
143         }
144         .long_name {
145             u.read.special = .long_name
146             u.reader.other_block(mut u.read, 'size:${u.size}')
147             if u.size > 0 {
148                 u.state = .long_path
149                 u.long_path = new_long_path(u.size)
150             }
151         }
152         .hard_link, .sym_link, .char_dev, .block_dev, .fifo {
153             u.read.special = .ignore
154             u.reader.other_block(mut u.read, block_header.str())
155         }
156         .global {
157             u.read.special = .global
158             u.read.set_short_path(u.buffer, false)
159             u.reader.other_block(mut u.read, 'size:${u.size}')
160             if u.size > 0 {
161                 u.state = .data
162             }
163         }
164     }
165 }
166 
167 // reader_data calls Reader.data_block for implementor to collect data parts as file content
168 fn (mut u Untar) read_data() {
169     if u.size > 0 {
170         part := if u.size > 512 { 512 } else { u.size }
171         u.size -= 512
172         pending := if u.size > 0 { u.size } else { 0 }
173         data_part := u.buffer[0..part]
174         u.reader.data_block(mut u.read, data_part, pending)
175     }
176     if u.size <= 0 {
177         u.long_path = unsafe { nil }
178         u.read.long_path = unsafe { nil } // real clear
179         u.state = .header
180     }
181 }
182 
183 fn (mut u Untar) read_long_path() {
184     if u.size > 0 {
185         part := if u.size > 512 { 512 } else { u.size }
186         u.size -= 512
187         data_part := u.buffer[0..part]
188         if u.long_path != unsafe { nil } {
189             // this long path field collects the data parts as file long name
190             u.long_path.append(data_part)
191             u.reader.other_block(mut u.read, 'data_part:${data_part.len}')
192         }
193     }
194     if u.size <= 0 {
195         u.state = .header
196     }
197 }
198 
199 // extract_path returns the block path for directories and files.
200 fn (mut u Untar) extract_path() string {
201     mut name := []u8{}
202     mut i := 0
203     for {
204         if i >= u.buffer.len {
205             break
206         }
207         letter := u.buffer[i]
208         if letter == 0 {
209             break
210         }
211         name << letter
212         i++
213     }
214     return name.bytestr()
215 }
216 
217 // checksum_ok verifies the validity for dir and files blocks.
218 fn (mut u Untar) checksum_ok() bool {
219     mut v := u64(0)
220     for n := 0; n < 512; n++ {
221         if n < 148 || n > 155 {
222             v += u.buffer[n]
223         } else {
224             v += 0x20
225         }
226     }
227     parse := u.extract_octal(148, 8)
228     return v == parse
229 }
230 
231 // extract_octal reads an octal number at block position `pos` with a given number of `digits`.
232 fn (mut u Untar) extract_octal(pos int, digits int) u64 {
233     mut i := u64(0)
234     mut p := pos
235     mut n := digits
236     for {
237         if (u.buffer[p] < `0` || u.buffer[p] > `7`) && n > 0 {
238             p++
239             n--
240         } else {
241             break
242         }
243     }
244     for {
245         if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 {
246             i *= 8
247             i += u8(u.buffer[p] - `0`)
248             p++
249             n--
250         } else {
251             break
252         }
253     }
254     return i
255 }
256 
257 @[heap]
258 struct LongPath {
259 mut:
260     name     []u8
261     last_pos int
262 }
263 
264 // new_long_path builds a LongPath with a fixed maximum name size
265 fn new_long_path(size int) &LongPath {
266     return &LongPath{
267         name: []u8{len: size}
268     }
269 }
270 
271 // appends copies the data to the
272 fn (mut l LongPath) append(data []u8) {
273     if l.name.len >= l.last_pos + data.len {
274         for i, d in data {
275             l.name[l.last_pos + i] = d
276         }
277         l.last_pos += data.len
278     }
279 }
280 
281 // get_path returns the string from name appended as C string.
282 fn (l LongPath) get_path() string {
283     mut s := []u8{}
284     for n in l.name {
285         if n == 0 {
286             break
287         }
288         s << n
289     }
290     return s.bytestr()
291 }
292

1	module tar
2
3	// Untar uses a reader to parse the contents of a unix tar file.
4	// Reuses a fixed array of 512 bytes to parse each TAR block.
5	@[heap]
6	pub struct Untar {
7	mut:
8	reader Reader
9	max_blocks int
10	buffer [512]u8 // data to parse block
11	read Read // last read to send/receive to/from reader implementation
12
13	state State // true when reading data blocks or long names
14	size int // remaining data size during state_data
15
16	long_path &LongPath = unsafe { nil } // not nil to hold a file long_name
17
18	blank_block int = -1 // last no-data block with all-zeros
19	}
20
21	enum State {
22	header
23	data
24	long_path
25	}
26
27	// new_untar builds a untar with a given Reader.
28	pub fn new_untar(reader Reader) &Untar {
29	return &Untar{
30	reader: reader
31	}
32	}
33
34	// str returns a string representation with max_blocks and last read.
35	pub fn (u Untar) str() string {
36	return 'max_blocks:${u.max_blocks} last_read:${u.read}'
37	}
38
39	// read_all_blocks parses the data blocks of any decompressed .tar.gz array.*
40	// The data blocks length must be divisible by 512.
41	pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult {
42	if blocks.len % 512 != 0 {
43	return error('data_blocks size is not a multiple of 512')
44	}
45	u.max_blocks = blocks.len / 512
46	for i := 0; i < blocks.len; i += 512 {
47	result := u.read_single_block(blocks[i..i + 512])!
48	if result != .continue {
49	return result
50	}
51	}
52	return .end_of_file
53	}
54
55	// read_single_block parses one data block at a time.
56	// The data block length must be 512. Two consecutive no data blocks
57	// have 512 zeroes returns a .end_archive result.
58	pub fn (mut u Untar) read_single_block(block []u8) !ReadResult {
59	if block.len != 512 {
60	return error('data_block size is not 512')
61	}
62	u.read.block_number++ // 1,2,3...
63
64	mut is_blank_block := true
65	for i in 0 .. 512 {
66	u.buffer[i] = block[i]
67	if block[i] != 0 {
68	is_blank_block = false
69	}
70	}
71	match u.state {
72	.header {
73	if is_blank_block {
74	// current non-data block is a blank block
75	prev_block := u.read.block_number - 1
76	result := if u.blank_block == prev_block {
77	// two consecutive blank blocks
78	u.read.special = .blank_2
79	ReadResult.end_archive
80	} else {
81	// first blank block
82	u.read.special = .blank_1
83	ReadResult.continue
84	}
85	u.read.path_len = 0
86	u.reader.other_block(mut u.read, '${result}')
87	u.blank_block = u.read.block_number
88	return result
89	}
90	u.read_header()!
91	}
92	.data {
93	u.read_data()
94	}
95	.long_path {
96	u.read_long_path()
97	}
98	}
99
100	return if u.read.stop_early {
101	.stop_early
102	} else {
103	.continue
104	}
105	}
106
107	fn (mut u Untar) read_header() ! {
108	u.size = int(u.extract_octal(124, 12))
109	header := u.buffer[156] // pos 0x9c
110	block_header := BlockHeader.from(header) or {
111	u.read.special = .unknown
112	u.read.path_len = 0
113	u.reader.other_block(mut u.read, 'size:${u.size}')
114	return
115	}
116	match block_header {
117	.dir {
118	if !u.checksum_ok() {
119	return error('Checksum error: directory reading:${u.read}')
120	}
121	u.read.special = .no
122	u.read.set_short_path(u.buffer, false)
123	u.reader.dir_block(mut u.read, u64(u.size))
124	// u.state = .header
125	}
126	.file {
127	if !u.checksum_ok() {
128	return error('Checksum error file reading:${u.read}')
129	}
130	u.read.special = .no
131	if u.long_path != unsafe { nil } {
132	u.read.set_long_path(u.long_path)
133	if u.size > 0 {
134	u.state = .data
135	}
136	} else {
137	u.read.set_short_path(u.buffer, true)
138	if u.size > 0 {
139	u.state = .data
140	}
141	}
142	u.reader.file_block(mut u.read, u64(u.size))
143	}
144	.long_name {
145	u.read.special = .long_name
146	u.reader.other_block(mut u.read, 'size:${u.size}')
147	if u.size > 0 {
148	u.state = .long_path
149	u.long_path = new_long_path(u.size)
150	}
151	}
152	.hard_link, .sym_link, .char_dev, .block_dev, .fifo {
153	u.read.special = .ignore
154	u.reader.other_block(mut u.read, block_header.str())
155	}
156	.global {
157	u.read.special = .global
158	u.read.set_short_path(u.buffer, false)
159	u.reader.other_block(mut u.read, 'size:${u.size}')
160	if u.size > 0 {
161	u.state = .data
162	}
163	}
164	}
165	}
166
167	// reader_data calls Reader.data_block for implementor to collect data parts as file content
168	fn (mut u Untar) read_data() {
169	if u.size > 0 {
170	part := if u.size > 512 { 512 } else { u.size }
171	u.size -= 512
172	pending := if u.size > 0 { u.size } else { 0 }
173	data_part := u.buffer[0..part]
174	u.reader.data_block(mut u.read, data_part, pending)
175	}
176	if u.size <= 0 {
177	u.long_path = unsafe { nil }
178	u.read.long_path = unsafe { nil } // real clear
179	u.state = .header
180	}
181	}
182
183	fn (mut u Untar) read_long_path() {
184	if u.size > 0 {
185	part := if u.size > 512 { 512 } else { u.size }
186	u.size -= 512
187	data_part := u.buffer[0..part]
188	if u.long_path != unsafe { nil } {
189	// this long path field collects the data parts as file long name
190	u.long_path.append(data_part)
191	u.reader.other_block(mut u.read, 'data_part:${data_part.len}')
192	}
193	}
194	if u.size <= 0 {
195	u.state = .header
196	}
197	}
198
199	// extract_path returns the block path for directories and files.
200	fn (mut u Untar) extract_path() string {
201	mut name := []u8{}
202	mut i := 0
203	for {
204	if i >= u.buffer.len {
205	break
206	}
207	letter := u.buffer[i]
208	if letter == 0 {
209	break
210	}
211	name << letter
212	i++
213	}
214	return name.bytestr()
215	}
216
217	// checksum_ok verifies the validity for dir and files blocks.
218	fn (mut u Untar) checksum_ok() bool {
219	mut v := u64(0)
220	for n := 0; n < 512; n++ {
221	if n < 148 \|\| n > 155 {
222	v += u.buffer[n]
223	} else {
224	v += 0x20
225	}
226	}
227	parse := u.extract_octal(148, 8)
228	return v == parse
229	}
230
231	// extract_octal reads an octal number at block position `pos` with a given number of `digits`.
232	fn (mut u Untar) extract_octal(pos int, digits int) u64 {
233	mut i := u64(0)
234	mut p := pos
235	mut n := digits
236	for {
237	if (u.buffer[p] < `0` \|\| u.buffer[p] > `7`) && n > 0 {
238	p++
239	n--
240	} else {
241	break
242	}
243	}
244	for {
245	if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 {
246	i *= 8
247	i += u8(u.buffer[p] - `0`)
248	p++
249	n--
250	} else {
251	break
252	}
253	}
254	return i
255	}
256
257	@[heap]
258	struct LongPath {
259	mut:
260	name []u8
261	last_pos int
262	}
263
264	// new_long_path builds a LongPath with a fixed maximum name size
265	fn new_long_path(size int) &LongPath {
266	return &LongPath{
267	name: []u8{len: size}
268	}
269	}
270
271	// appends copies the data to the
272	fn (mut l LongPath) append(data []u8) {
273	if l.name.len >= l.last_pos + data.len {
274	for i, d in data {
275	l.name[l.last_pos + i] = d
276	}
277	l.last_pos += data.len
278	}
279	}
280
281	// get_path returns the string from name appended as C string.
282	fn (l LongPath) get_path() string {
283	mut s := []u8{}
284	for n in l.name {
285	if n == 0 {
286	break
287	}
288	s << n
289	}
290	return s.bytestr()
291	}
292