v2 / vlib / archive / tar / untar.v
291 lines · 270 sloc · 6.41 KB · 8e35f4d9848f7ad35d857a187dddbfd2eca5e19d
Raw
1module tar
2
3// Untar uses a reader to parse the contents of a unix tar file.
4// Reuses a fixed array of 512 bytes to parse each TAR block.
5@[heap]
6pub struct Untar {
7mut:
8 reader Reader
9 max_blocks int
10 buffer [512]u8 // data to parse block
11 read Read // last read to send/receive to/from reader implementation
12
13 state State // true when reading data blocks or long names
14 size int // remaining data size during state_data
15
16 long_path &LongPath = unsafe { nil } // not nil to hold a file long_name
17
18 blank_block int = -1 // last no-data block with all-zeros
19}
20
21enum State {
22 header
23 data
24 long_path
25}
26
27// new_untar builds a untar with a given Reader.
28pub fn new_untar(reader Reader) &Untar {
29 return &Untar{
30 reader: reader
31 }
32}
33
34// str returns a string representation with max_blocks and last read.
35pub fn (u Untar) str() string {
36 return 'max_blocks:${u.max_blocks} last_read:${u.read}'
37}
38
39// read_all_blocks parses the data blocks of any decompressed *.tar.gz array.
40// The data blocks length must be divisible by 512.
41pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult {
42 if blocks.len % 512 != 0 {
43 return error('data_blocks size is not a multiple of 512')
44 }
45 u.max_blocks = blocks.len / 512
46 for i := 0; i < blocks.len; i += 512 {
47 result := u.read_single_block(blocks[i..i + 512])!
48 if result != .continue {
49 return result
50 }
51 }
52 return .end_of_file
53}
54
55// read_single_block parses one data block at a time.
56// The data block length must be 512. Two consecutive no data blocks
57// have 512 zeroes returns a .end_archive result.
58pub fn (mut u Untar) read_single_block(block []u8) !ReadResult {
59 if block.len != 512 {
60 return error('data_block size is not 512')
61 }
62 u.read.block_number++ // 1,2,3...
63
64 mut is_blank_block := true
65 for i in 0 .. 512 {
66 u.buffer[i] = block[i]
67 if block[i] != 0 {
68 is_blank_block = false
69 }
70 }
71 match u.state {
72 .header {
73 if is_blank_block {
74 // current non-data block is a blank block
75 prev_block := u.read.block_number - 1
76 result := if u.blank_block == prev_block {
77 // two consecutive blank blocks
78 u.read.special = .blank_2
79 ReadResult.end_archive
80 } else {
81 // first blank block
82 u.read.special = .blank_1
83 ReadResult.continue
84 }
85 u.read.path_len = 0
86 u.reader.other_block(mut u.read, '${result}')
87 u.blank_block = u.read.block_number
88 return result
89 }
90 u.read_header()!
91 }
92 .data {
93 u.read_data()
94 }
95 .long_path {
96 u.read_long_path()
97 }
98 }
99
100 return if u.read.stop_early {
101 .stop_early
102 } else {
103 .continue
104 }
105}
106
107fn (mut u Untar) read_header() ! {
108 u.size = int(u.extract_octal(124, 12))
109 header := u.buffer[156] // pos 0x9c
110 block_header := BlockHeader.from(header) or {
111 u.read.special = .unknown
112 u.read.path_len = 0
113 u.reader.other_block(mut u.read, 'size:${u.size}')
114 return
115 }
116 match block_header {
117 .dir {
118 if !u.checksum_ok() {
119 return error('Checksum error: directory reading:${u.read}')
120 }
121 u.read.special = .no
122 u.read.set_short_path(u.buffer, false)
123 u.reader.dir_block(mut u.read, u64(u.size))
124 // u.state = .header
125 }
126 .file {
127 if !u.checksum_ok() {
128 return error('Checksum error file reading:${u.read}')
129 }
130 u.read.special = .no
131 if u.long_path != unsafe { nil } {
132 u.read.set_long_path(u.long_path)
133 if u.size > 0 {
134 u.state = .data
135 }
136 } else {
137 u.read.set_short_path(u.buffer, true)
138 if u.size > 0 {
139 u.state = .data
140 }
141 }
142 u.reader.file_block(mut u.read, u64(u.size))
143 }
144 .long_name {
145 u.read.special = .long_name
146 u.reader.other_block(mut u.read, 'size:${u.size}')
147 if u.size > 0 {
148 u.state = .long_path
149 u.long_path = new_long_path(u.size)
150 }
151 }
152 .hard_link, .sym_link, .char_dev, .block_dev, .fifo {
153 u.read.special = .ignore
154 u.reader.other_block(mut u.read, block_header.str())
155 }
156 .global {
157 u.read.special = .global
158 u.read.set_short_path(u.buffer, false)
159 u.reader.other_block(mut u.read, 'size:${u.size}')
160 if u.size > 0 {
161 u.state = .data
162 }
163 }
164 }
165}
166
167// reader_data calls Reader.data_block for implementor to collect data parts as file content
168fn (mut u Untar) read_data() {
169 if u.size > 0 {
170 part := if u.size > 512 { 512 } else { u.size }
171 u.size -= 512
172 pending := if u.size > 0 { u.size } else { 0 }
173 data_part := u.buffer[0..part]
174 u.reader.data_block(mut u.read, data_part, pending)
175 }
176 if u.size <= 0 {
177 u.long_path = unsafe { nil }
178 u.read.long_path = unsafe { nil } // real clear
179 u.state = .header
180 }
181}
182
183fn (mut u Untar) read_long_path() {
184 if u.size > 0 {
185 part := if u.size > 512 { 512 } else { u.size }
186 u.size -= 512
187 data_part := u.buffer[0..part]
188 if u.long_path != unsafe { nil } {
189 // this long path field collects the data parts as file long name
190 u.long_path.append(data_part)
191 u.reader.other_block(mut u.read, 'data_part:${data_part.len}')
192 }
193 }
194 if u.size <= 0 {
195 u.state = .header
196 }
197}
198
199// extract_path returns the block path for directories and files.
200fn (mut u Untar) extract_path() string {
201 mut name := []u8{}
202 mut i := 0
203 for {
204 if i >= u.buffer.len {
205 break
206 }
207 letter := u.buffer[i]
208 if letter == 0 {
209 break
210 }
211 name << letter
212 i++
213 }
214 return name.bytestr()
215}
216
217// checksum_ok verifies the validity for dir and files blocks.
218fn (mut u Untar) checksum_ok() bool {
219 mut v := u64(0)
220 for n := 0; n < 512; n++ {
221 if n < 148 || n > 155 {
222 v += u.buffer[n]
223 } else {
224 v += 0x20
225 }
226 }
227 parse := u.extract_octal(148, 8)
228 return v == parse
229}
230
231// extract_octal reads an octal number at block position `pos` with a given number of `digits`.
232fn (mut u Untar) extract_octal(pos int, digits int) u64 {
233 mut i := u64(0)
234 mut p := pos
235 mut n := digits
236 for {
237 if (u.buffer[p] < `0` || u.buffer[p] > `7`) && n > 0 {
238 p++
239 n--
240 } else {
241 break
242 }
243 }
244 for {
245 if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 {
246 i *= 8
247 i += u8(u.buffer[p] - `0`)
248 p++
249 n--
250 } else {
251 break
252 }
253 }
254 return i
255}
256
257@[heap]
258struct LongPath {
259mut:
260 name []u8
261 last_pos int
262}
263
264// new_long_path builds a LongPath with a fixed maximum name size
265fn new_long_path(size int) &LongPath {
266 return &LongPath{
267 name: []u8{len: size}
268 }
269}
270
271// appends copies the data to the
272fn (mut l LongPath) append(data []u8) {
273 if l.name.len >= l.last_pos + data.len {
274 for i, d in data {
275 l.name[l.last_pos + i] = d
276 }
277 l.last_pos += data.len
278 }
279}
280
281// get_path returns the string from name appended as C string.
282fn (l LongPath) get_path() string {
283 mut s := []u8{}
284 for n in l.name {
285 if n == 0 {
286 break
287 }
288 s << n
289 }
290 return s.bytestr()
291}
292