| 1 | module tar |
| 2 | |
| 3 | import compress.gzip |
| 4 | import os |
| 5 | |
| 6 | // read_tar_file reads a given local .tar file and parses all blocks with a |
| 7 | // given reader. |
| 8 | pub fn read_tar_file(path string, reader Reader) ! { |
| 9 | all_blocks := os.read_bytes(path)! |
| 10 | read_tar_blocks(all_blocks, reader)! |
| 11 | } |
| 12 | |
| 13 | // read_tar_gz_file decompresses a given local file and reads all the blocks |
| 14 | // with a given reader. |
| 15 | pub fn read_tar_gz_file(path string, reader Reader) ! { |
| 16 | tar_gz := os.read_bytes(path)! |
| 17 | all_blocks := gzip.decompress(tar_gz)! |
| 18 | read_tar_blocks(all_blocks, reader)! |
| 19 | } |
| 20 | |
| 21 | fn read_tar_blocks(all_blocks []u8, reader Reader) ! { |
| 22 | mut untar := Untar{ |
| 23 | reader: reader |
| 24 | } |
| 25 | untar.read_all_blocks(all_blocks)! |
| 26 | } |
| 27 | |
| 28 | // Read is used by Untar to call Reader implemented methods. |
| 29 | // The implementor can read the block's `get_block_number()` and `get_path()` |
| 30 | // and can set the field `stop_early` to true to suspend the reading. |
| 31 | pub struct Read { |
| 32 | mut: |
| 33 | block_number int |
| 34 | special BlockSpecial |
| 35 | prefix_len int |
| 36 | prefix_buf [131]u8 |
| 37 | separator bool |
| 38 | path_len int |
| 39 | path_buf [100]u8 |
| 40 | |
| 41 | long_path &LongPath = unsafe { nil } |
| 42 | pub mut: |
| 43 | stop_early bool |
| 44 | } |
| 45 | |
| 46 | // set_short_path sets Read path with the tar block strings `prefix` and `path`. |
| 47 | // Block's `prefix` C string max length is 131 but most of the time is 0. |
| 48 | // Block's `path` C string max length is 100. Both `prefix` and `path` are |
| 49 | // linked to a V string but converted until is needed, see `get_path()`. |
| 50 | fn (mut b Read) set_short_path(buffer [512]u8, separator_after_prefix bool) { |
| 51 | // first check if TAR block has a prefix string (0 to 131 chars). The |
| 52 | // prefix will be other than '' the TAR block filepath len is > 100. |
| 53 | b.prefix_len = 0 |
| 54 | for i := 345; i < 345 + 131; i++ { |
| 55 | letter := buffer[i] |
| 56 | if letter == 0 { |
| 57 | break // first 0 found means prefix C string is complete. |
| 58 | } |
| 59 | b.prefix_buf[b.prefix_len] = letter |
| 60 | b.prefix_len++ |
| 61 | } |
| 62 | |
| 63 | b.separator = separator_after_prefix |
| 64 | |
| 65 | // most of the time there is path for blocks like dirs and regular files: |
| 66 | b.path_len = 0 |
| 67 | for i := 0; i < 100; i++ { |
| 68 | letter := buffer[i] |
| 69 | if letter == 0 { |
| 70 | break // first 0 found means path C string is complete. |
| 71 | } |
| 72 | b.path_buf[b.path_len] = letter |
| 73 | b.path_len++ |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | // set_long_path sets Read path with the long path reference. |
| 78 | fn (mut b Read) set_long_path(long_path &LongPath) { |
| 79 | b.long_path = unsafe { long_path } |
| 80 | } |
| 81 | |
| 82 | // get_path returns the path of this read. The path is valid for blocks of types |
| 83 | // directory, file and file data. |
| 84 | pub fn (b Read) get_path() string { |
| 85 | if b.long_path != unsafe { nil } { |
| 86 | return b.long_path.get_path() |
| 87 | } |
| 88 | |
| 89 | mut str := []u8{cap: b.prefix_len + b.path_len + 1} |
| 90 | if b.prefix_len > 0 { |
| 91 | for i in 0 .. b.prefix_len { |
| 92 | str << b.prefix_buf[i] |
| 93 | } |
| 94 | } |
| 95 | if b.prefix_len > 0 && b.separator { |
| 96 | str << `/` |
| 97 | } |
| 98 | if b.path_len > 0 { |
| 99 | for i in 0 .. b.path_len { |
| 100 | str << b.path_buf[i] |
| 101 | } |
| 102 | } |
| 103 | return str.bytestr() |
| 104 | } |
| 105 | |
| 106 | // get_block_number returns the consecutive number of this read. |
| 107 | pub fn (b Read) get_block_number() int { |
| 108 | return b.block_number |
| 109 | } |
| 110 | |
| 111 | // get_special returns the special type of the Read. |
| 112 | pub fn (b Read) get_special() BlockSpecial { |
| 113 | return b.special |
| 114 | } |
| 115 | |
| 116 | // str returns a string representation with block number, path, special type and stop early. |
| 117 | pub fn (r Read) str() string { |
| 118 | return '(block_number:${r.block_number} path:${r.get_path()} special:${r.special} stop_early:${r.stop_early})' |
| 119 | } |
| 120 | |
| 121 | // Reader is used to read by Untar to parse the blocks. |
| 122 | pub interface Reader { |
| 123 | mut: |
| 124 | // dir_block is called when untar reads a block of type directory. |
| 125 | // Call `Read.get_path()` to get the full name of the directory. |
| 126 | // `size` field is zero for directories. |
| 127 | // The implementor can set Read's field `stop_early` to suspend the reader. |
| 128 | dir_block(mut read Read, size u64) |
| 129 | |
| 130 | // file_block is called when untar reads a block of type filename. |
| 131 | // Call `Read.get_path()` to get the full name of the file. |
| 132 | // `size` is the expected file size in bytes to be read later. |
| 133 | // The implementor can set Read's field `stop_early` to suspend the reader. |
| 134 | file_block(mut read Read, size u64) |
| 135 | |
| 136 | // file_block is called when untar reads a block of type filedata. |
| 137 | // Call `Read.get_path()` to get the full name of the file data belongs to. |
| 138 | // The `data` size is 512 bytes or less. `pending` indicates how many bytes are left to read. |
| 139 | // The implementor can inspect the data and use the pending value |
| 140 | // to set Read's field `stop_early` to suspend the reader. |
| 141 | data_block(mut read Read, data []u8, pending int) |
| 142 | |
| 143 | // other_block is called when untar reads a block type other than directory, |
| 144 | // filename or filedata. `Read.get_header()` and 'details' give more info about the block. |
| 145 | // `block device` or `FIFO`. |
| 146 | // The implementor can set Read's field `stop_early` to suspend the reader. |
| 147 | other_block(mut read Read, details string) |
| 148 | } |
| 149 | |
| 150 | // DebugReader implements a Reader and prints rows for blocks read |
| 151 | // as directories, files, file data blocks and special blocks. |
| 152 | pub struct DebugReader implements Reader { |
| 153 | } |
| 154 | |
| 155 | // new_debug_reader returns a DebugReader |
| 156 | pub fn new_debug_reader() &DebugReader { |
| 157 | return &DebugReader{} |
| 158 | } |
| 159 | |
| 160 | fn (mut t DebugReader) dir_block(mut read Read, _size u64) { |
| 161 | println('DIR #${read.get_block_number()} ${read.get_path()}') |
| 162 | } |
| 163 | |
| 164 | fn (mut t DebugReader) file_block(mut read Read, size u64) { |
| 165 | println('FILE #${read.get_block_number()} path:${read.get_path()} size:${size}') |
| 166 | } |
| 167 | |
| 168 | fn (mut t DebugReader) data_block(mut read Read, data []u8, pending int) { |
| 169 | println('DATA #${read.get_block_number()} ${read.get_path()} size:${data.len} pending:${pending}') |
| 170 | } |
| 171 | |
| 172 | fn (mut t DebugReader) other_block(mut read Read, details string) { |
| 173 | println('OTHER #${read.get_block_number()} special:${read.special} ${details}') |
| 174 | } |
| 175 | |
| 176 | // ReadResult is returned by ReadResultFn |
| 177 | pub enum ReadResult { |
| 178 | @continue |
| 179 | stop_early |
| 180 | end_of_file |
| 181 | end_archive |
| 182 | overflow |
| 183 | } |
| 184 | |
| 185 | type ReadResultFn = fn (block []u8) !ReadResult |
| 186 | |
| 187 | @[heap] |
| 188 | pub struct Decompressor { |
| 189 | mut: |
| 190 | untar &Untar |
| 191 | } |
| 192 | |
| 193 | // new_decompressor returns a Decompressor to decompress a tar.gz file |
| 194 | // A given Untar with a registered Reader will read the blocks. |
| 195 | pub fn new_decompressor(untar &Untar) &Decompressor { |
| 196 | return &Decompressor{ |
| 197 | untar: untar |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | // read_all decompresses the given `tar_gz` array with all the tar blocks. |
| 202 | // Then calls untar method `read_all` to read all the blocks at once. |
| 203 | // A read result is returned which can be of the type stop early or an error. |
| 204 | pub fn (mut d Decompressor) read_all(tar_gz []u8) !ReadResult { |
| 205 | all_blocks := gzip.decompress(tar_gz)! |
| 206 | return d.untar.read_all_blocks(all_blocks)! |
| 207 | } |
| 208 | |
| 209 | // read_chunks decompresses the given `tar_gz` array by chunks of |
| 210 | // 32768 bytes which can hold up to 64 tar blocks of 512 bytes each. |
| 211 | // Then calls untar method read_block with ChunksReader dispatcher. |
| 212 | // A read result is returned which can be of the type stop early or an error. |
| 213 | pub fn (mut d Decompressor) read_chunks(tar_gz []u8) !ReadResult { |
| 214 | mut reader := &ChunksReader{ |
| 215 | read_block_fn: d.untar.read_single_block |
| 216 | } |
| 217 | callback := fn (chunk []u8, mut reader ChunksReader) int { |
| 218 | result := reader.read_blocks(chunk) |
| 219 | if result == .continue { |
| 220 | return chunk.len // go for more |
| 221 | } |
| 222 | return 0 // suspend |
| 223 | } |
| 224 | gzip.decompress_with_callback(tar_gz, callback, reader) or { |
| 225 | if reader.result == .continue { |
| 226 | return err |
| 227 | } |
| 228 | return reader.result |
| 229 | } |
| 230 | return reader.result |
| 231 | } |
| 232 | |
| 233 | // ChunkReader has a reusable fixed buffer with maximum length of decompressed chunk |
| 234 | // of 32768 bytes plus a maximum previous pending tar block of 512 bytes. |
| 235 | struct ChunksReader { |
| 236 | mut: |
| 237 | read_block_fn ReadResultFn = unsafe { nil } |
| 238 | buffer [32768 + 512]u8 |
| 239 | chunks_counter int |
| 240 | pending int // position of the last not sent buffer byte |
| 241 | result ReadResult |
| 242 | } |
| 243 | |
| 244 | // read_blocks receives a chunk like those of 32k from a gzip decompressor. The chunk is |
| 245 | // assumed to be a TAR archive section and is cut in 512 bytes blocks that are sent to |
| 246 | // the untar reader one by one. The untar reader result informs this process to continue or |
| 247 | // stop early. This process can keep in the buffer the remaining bytes of an incomplete |
| 248 | // block and will be send to the untar reader prepended to a next chunk cuts. |
| 249 | fn (mut d ChunksReader) read_blocks(chunk []u8) ReadResult { |
| 250 | d.chunks_counter++ |
| 251 | total := d.pending + chunk.len |
| 252 | if total > d.buffer.len { |
| 253 | assert false, 'Should not occur buffer overflow ${total}' |
| 254 | return .overflow |
| 255 | } |
| 256 | |
| 257 | // append new chunk after previous incomplete block bytes not sent yet |
| 258 | for i, ch in chunk { |
| 259 | d.buffer[i + d.pending] = ch |
| 260 | } |
| 261 | d.pending += chunk.len |
| 262 | |
| 263 | mut cut := 0 |
| 264 | for { |
| 265 | if cut + 512 > d.pending { |
| 266 | // after sending all complete blocks move the remaining not sent bytes |
| 267 | // to the start of the reused buffer to be prepended before next chunk |
| 268 | for i := cut; i < d.pending; i++ { |
| 269 | d.buffer[i - cut] = d.buffer[i] |
| 270 | } |
| 271 | d.pending -= cut |
| 272 | return .continue |
| 273 | } |
| 274 | |
| 275 | // send a complete block |
| 276 | block := d.buffer[cut..cut + 512] |
| 277 | cut += 512 |
| 278 | d.result = d.read_block_fn(block) or { |
| 279 | assert false, 'Should not occur buffer overflow' |
| 280 | return .overflow |
| 281 | } |
| 282 | match d.result { |
| 283 | .continue { |
| 284 | // try next cut or leave a remaining |
| 285 | } |
| 286 | else { |
| 287 | break // untar error or stop_early |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | return d.result |
| 292 | } |
| 293 | |