v2 / vlib / archive / tar / reader.v
292 lines · 256 sloc · 8.92 KB · 3d60410b605d001e54f280070d5f952da9de1112
Raw
1module tar
2
3import compress.gzip
4import os
5
6// read_tar_file reads a given local .tar file and parses all blocks with a
7// given reader.
8pub fn read_tar_file(path string, reader Reader) ! {
9 all_blocks := os.read_bytes(path)!
10 read_tar_blocks(all_blocks, reader)!
11}
12
13// read_tar_gz_file decompresses a given local file and reads all the blocks
14// with a given reader.
15pub fn read_tar_gz_file(path string, reader Reader) ! {
16 tar_gz := os.read_bytes(path)!
17 all_blocks := gzip.decompress(tar_gz)!
18 read_tar_blocks(all_blocks, reader)!
19}
20
21fn read_tar_blocks(all_blocks []u8, reader Reader) ! {
22 mut untar := Untar{
23 reader: reader
24 }
25 untar.read_all_blocks(all_blocks)!
26}
27
28// Read is used by Untar to call Reader implemented methods.
29// The implementor can read the block's `get_block_number()` and `get_path()`
30// and can set the field `stop_early` to true to suspend the reading.
31pub struct Read {
32mut:
33 block_number int
34 special BlockSpecial
35 prefix_len int
36 prefix_buf [131]u8
37 separator bool
38 path_len int
39 path_buf [100]u8
40
41 long_path &LongPath = unsafe { nil }
42pub mut:
43 stop_early bool
44}
45
46// set_short_path sets Read path with the tar block strings `prefix` and `path`.
47// Block's `prefix` C string max length is 131 but most of the time is 0.
48// Block's `path` C string max length is 100. Both `prefix` and `path` are
49// linked to a V string but converted until is needed, see `get_path()`.
50fn (mut b Read) set_short_path(buffer [512]u8, separator_after_prefix bool) {
51 // first check if TAR block has a prefix string (0 to 131 chars). The
52 // prefix will be other than '' the TAR block filepath len is > 100.
53 b.prefix_len = 0
54 for i := 345; i < 345 + 131; i++ {
55 letter := buffer[i]
56 if letter == 0 {
57 break // first 0 found means prefix C string is complete.
58 }
59 b.prefix_buf[b.prefix_len] = letter
60 b.prefix_len++
61 }
62
63 b.separator = separator_after_prefix
64
65 // most of the time there is path for blocks like dirs and regular files:
66 b.path_len = 0
67 for i := 0; i < 100; i++ {
68 letter := buffer[i]
69 if letter == 0 {
70 break // first 0 found means path C string is complete.
71 }
72 b.path_buf[b.path_len] = letter
73 b.path_len++
74 }
75}
76
77// set_long_path sets Read path with the long path reference.
78fn (mut b Read) set_long_path(long_path &LongPath) {
79 b.long_path = unsafe { long_path }
80}
81
82// get_path returns the path of this read. The path is valid for blocks of types
83// directory, file and file data.
84pub fn (b Read) get_path() string {
85 if b.long_path != unsafe { nil } {
86 return b.long_path.get_path()
87 }
88
89 mut str := []u8{cap: b.prefix_len + b.path_len + 1}
90 if b.prefix_len > 0 {
91 for i in 0 .. b.prefix_len {
92 str << b.prefix_buf[i]
93 }
94 }
95 if b.prefix_len > 0 && b.separator {
96 str << `/`
97 }
98 if b.path_len > 0 {
99 for i in 0 .. b.path_len {
100 str << b.path_buf[i]
101 }
102 }
103 return str.bytestr()
104}
105
106// get_block_number returns the consecutive number of this read.
107pub fn (b Read) get_block_number() int {
108 return b.block_number
109}
110
111// get_special returns the special type of the Read.
112pub fn (b Read) get_special() BlockSpecial {
113 return b.special
114}
115
116// str returns a string representation with block number, path, special type and stop early.
117pub fn (r Read) str() string {
118 return '(block_number:${r.block_number} path:${r.get_path()} special:${r.special} stop_early:${r.stop_early})'
119}
120
121// Reader is used to read by Untar to parse the blocks.
122pub interface Reader {
123mut:
124 // dir_block is called when untar reads a block of type directory.
125 // Call `Read.get_path()` to get the full name of the directory.
126 // `size` field is zero for directories.
127 // The implementor can set Read's field `stop_early` to suspend the reader.
128 dir_block(mut read Read, size u64)
129
130 // file_block is called when untar reads a block of type filename.
131 // Call `Read.get_path()` to get the full name of the file.
132 // `size` is the expected file size in bytes to be read later.
133 // The implementor can set Read's field `stop_early` to suspend the reader.
134 file_block(mut read Read, size u64)
135
136 // file_block is called when untar reads a block of type filedata.
137 // Call `Read.get_path()` to get the full name of the file data belongs to.
138 // The `data` size is 512 bytes or less. `pending` indicates how many bytes are left to read.
139 // The implementor can inspect the data and use the pending value
140 // to set Read's field `stop_early` to suspend the reader.
141 data_block(mut read Read, data []u8, pending int)
142
143 // other_block is called when untar reads a block type other than directory,
144 // filename or filedata. `Read.get_header()` and 'details' give more info about the block.
145 // `block device` or `FIFO`.
146 // The implementor can set Read's field `stop_early` to suspend the reader.
147 other_block(mut read Read, details string)
148}
149
150// DebugReader implements a Reader and prints rows for blocks read
151// as directories, files, file data blocks and special blocks.
152pub struct DebugReader implements Reader {
153}
154
155// new_debug_reader returns a DebugReader
156pub fn new_debug_reader() &DebugReader {
157 return &DebugReader{}
158}
159
160fn (mut t DebugReader) dir_block(mut read Read, _size u64) {
161 println('DIR #${read.get_block_number()} ${read.get_path()}')
162}
163
164fn (mut t DebugReader) file_block(mut read Read, size u64) {
165 println('FILE #${read.get_block_number()} path:${read.get_path()} size:${size}')
166}
167
168fn (mut t DebugReader) data_block(mut read Read, data []u8, pending int) {
169 println('DATA #${read.get_block_number()} ${read.get_path()} size:${data.len} pending:${pending}')
170}
171
172fn (mut t DebugReader) other_block(mut read Read, details string) {
173 println('OTHER #${read.get_block_number()} special:${read.special} ${details}')
174}
175
176// ReadResult is returned by ReadResultFn
177pub enum ReadResult {
178 @continue
179 stop_early
180 end_of_file
181 end_archive
182 overflow
183}
184
185type ReadResultFn = fn (block []u8) !ReadResult
186
187@[heap]
188pub struct Decompressor {
189mut:
190 untar &Untar
191}
192
193// new_decompressor returns a Decompressor to decompress a tar.gz file
194// A given Untar with a registered Reader will read the blocks.
195pub fn new_decompressor(untar &Untar) &Decompressor {
196 return &Decompressor{
197 untar: untar
198 }
199}
200
201// read_all decompresses the given `tar_gz` array with all the tar blocks.
202// Then calls untar method `read_all` to read all the blocks at once.
203// A read result is returned which can be of the type stop early or an error.
204pub fn (mut d Decompressor) read_all(tar_gz []u8) !ReadResult {
205 all_blocks := gzip.decompress(tar_gz)!
206 return d.untar.read_all_blocks(all_blocks)!
207}
208
209// read_chunks decompresses the given `tar_gz` array by chunks of
210// 32768 bytes which can hold up to 64 tar blocks of 512 bytes each.
211// Then calls untar method read_block with ChunksReader dispatcher.
212// A read result is returned which can be of the type stop early or an error.
213pub fn (mut d Decompressor) read_chunks(tar_gz []u8) !ReadResult {
214 mut reader := &ChunksReader{
215 read_block_fn: d.untar.read_single_block
216 }
217 callback := fn (chunk []u8, mut reader ChunksReader) int {
218 result := reader.read_blocks(chunk)
219 if result == .continue {
220 return chunk.len // go for more
221 }
222 return 0 // suspend
223 }
224 gzip.decompress_with_callback(tar_gz, callback, reader) or {
225 if reader.result == .continue {
226 return err
227 }
228 return reader.result
229 }
230 return reader.result
231}
232
233// ChunkReader has a reusable fixed buffer with maximum length of decompressed chunk
234// of 32768 bytes plus a maximum previous pending tar block of 512 bytes.
235struct ChunksReader {
236mut:
237 read_block_fn ReadResultFn = unsafe { nil }
238 buffer [32768 + 512]u8
239 chunks_counter int
240 pending int // position of the last not sent buffer byte
241 result ReadResult
242}
243
244// read_blocks receives a chunk like those of 32k from a gzip decompressor. The chunk is
245// assumed to be a TAR archive section and is cut in 512 bytes blocks that are sent to
246// the untar reader one by one. The untar reader result informs this process to continue or
247// stop early. This process can keep in the buffer the remaining bytes of an incomplete
248// block and will be send to the untar reader prepended to a next chunk cuts.
249fn (mut d ChunksReader) read_blocks(chunk []u8) ReadResult {
250 d.chunks_counter++
251 total := d.pending + chunk.len
252 if total > d.buffer.len {
253 assert false, 'Should not occur buffer overflow ${total}'
254 return .overflow
255 }
256
257 // append new chunk after previous incomplete block bytes not sent yet
258 for i, ch in chunk {
259 d.buffer[i + d.pending] = ch
260 }
261 d.pending += chunk.len
262
263 mut cut := 0
264 for {
265 if cut + 512 > d.pending {
266 // after sending all complete blocks move the remaining not sent bytes
267 // to the start of the reused buffer to be prepended before next chunk
268 for i := cut; i < d.pending; i++ {
269 d.buffer[i - cut] = d.buffer[i]
270 }
271 d.pending -= cut
272 return .continue
273 }
274
275 // send a complete block
276 block := d.buffer[cut..cut + 512]
277 cut += 512
278 d.result = d.read_block_fn(block) or {
279 assert false, 'Should not occur buffer overflow'
280 return .overflow
281 }
282 match d.result {
283 .continue {
284 // try next cut or leave a remaining
285 }
286 else {
287 break // untar error or stop_early
288 }
289 }
290 }
291 return d.result
292}
293