| 1 | module tar |
| 2 | |
| 3 | // Untar uses a reader to parse the contents of a unix tar file. |
| 4 | // Reuses a fixed array of 512 bytes to parse each TAR block. |
| 5 | @[heap] |
| 6 | pub struct Untar { |
| 7 | mut: |
| 8 | reader Reader |
| 9 | max_blocks int |
| 10 | buffer [512]u8 // data to parse block |
| 11 | read Read // last read to send/receive to/from reader implementation |
| 12 | |
| 13 | state State // true when reading data blocks or long names |
| 14 | size int // remaining data size during state_data |
| 15 | |
| 16 | long_path &LongPath = unsafe { nil } // not nil to hold a file long_name |
| 17 | |
| 18 | blank_block int = -1 // last no-data block with all-zeros |
| 19 | } |
| 20 | |
| 21 | enum State { |
| 22 | header |
| 23 | data |
| 24 | long_path |
| 25 | } |
| 26 | |
| 27 | // new_untar builds a untar with a given Reader. |
| 28 | pub fn new_untar(reader Reader) &Untar { |
| 29 | return &Untar{ |
| 30 | reader: reader |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | // str returns a string representation with max_blocks and last read. |
| 35 | pub fn (u Untar) str() string { |
| 36 | return 'max_blocks:${u.max_blocks} last_read:${u.read}' |
| 37 | } |
| 38 | |
| 39 | // read_all_blocks parses the data blocks of any decompressed *.tar.gz array. |
| 40 | // The data blocks length must be divisible by 512. |
| 41 | pub fn (mut u Untar) read_all_blocks(blocks []u8) !ReadResult { |
| 42 | if blocks.len % 512 != 0 { |
| 43 | return error('data_blocks size is not a multiple of 512') |
| 44 | } |
| 45 | u.max_blocks = blocks.len / 512 |
| 46 | for i := 0; i < blocks.len; i += 512 { |
| 47 | result := u.read_single_block(blocks[i..i + 512])! |
| 48 | if result != .continue { |
| 49 | return result |
| 50 | } |
| 51 | } |
| 52 | return .end_of_file |
| 53 | } |
| 54 | |
| 55 | // read_single_block parses one data block at a time. |
| 56 | // The data block length must be 512. Two consecutive no data blocks |
| 57 | // have 512 zeroes returns a .end_archive result. |
| 58 | pub fn (mut u Untar) read_single_block(block []u8) !ReadResult { |
| 59 | if block.len != 512 { |
| 60 | return error('data_block size is not 512') |
| 61 | } |
| 62 | u.read.block_number++ // 1,2,3... |
| 63 | |
| 64 | mut is_blank_block := true |
| 65 | for i in 0 .. 512 { |
| 66 | u.buffer[i] = block[i] |
| 67 | if block[i] != 0 { |
| 68 | is_blank_block = false |
| 69 | } |
| 70 | } |
| 71 | match u.state { |
| 72 | .header { |
| 73 | if is_blank_block { |
| 74 | // current non-data block is a blank block |
| 75 | prev_block := u.read.block_number - 1 |
| 76 | result := if u.blank_block == prev_block { |
| 77 | // two consecutive blank blocks |
| 78 | u.read.special = .blank_2 |
| 79 | ReadResult.end_archive |
| 80 | } else { |
| 81 | // first blank block |
| 82 | u.read.special = .blank_1 |
| 83 | ReadResult.continue |
| 84 | } |
| 85 | u.read.path_len = 0 |
| 86 | u.reader.other_block(mut u.read, '${result}') |
| 87 | u.blank_block = u.read.block_number |
| 88 | return result |
| 89 | } |
| 90 | u.read_header()! |
| 91 | } |
| 92 | .data { |
| 93 | u.read_data() |
| 94 | } |
| 95 | .long_path { |
| 96 | u.read_long_path() |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | return if u.read.stop_early { |
| 101 | .stop_early |
| 102 | } else { |
| 103 | .continue |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | fn (mut u Untar) read_header() ! { |
| 108 | u.size = int(u.extract_octal(124, 12)) |
| 109 | header := u.buffer[156] // pos 0x9c |
| 110 | block_header := BlockHeader.from(header) or { |
| 111 | u.read.special = .unknown |
| 112 | u.read.path_len = 0 |
| 113 | u.reader.other_block(mut u.read, 'size:${u.size}') |
| 114 | return |
| 115 | } |
| 116 | match block_header { |
| 117 | .dir { |
| 118 | if !u.checksum_ok() { |
| 119 | return error('Checksum error: directory reading:${u.read}') |
| 120 | } |
| 121 | u.read.special = .no |
| 122 | u.read.set_short_path(u.buffer, false) |
| 123 | u.reader.dir_block(mut u.read, u64(u.size)) |
| 124 | // u.state = .header |
| 125 | } |
| 126 | .file { |
| 127 | if !u.checksum_ok() { |
| 128 | return error('Checksum error file reading:${u.read}') |
| 129 | } |
| 130 | u.read.special = .no |
| 131 | if u.long_path != unsafe { nil } { |
| 132 | u.read.set_long_path(u.long_path) |
| 133 | if u.size > 0 { |
| 134 | u.state = .data |
| 135 | } |
| 136 | } else { |
| 137 | u.read.set_short_path(u.buffer, true) |
| 138 | if u.size > 0 { |
| 139 | u.state = .data |
| 140 | } |
| 141 | } |
| 142 | u.reader.file_block(mut u.read, u64(u.size)) |
| 143 | } |
| 144 | .long_name { |
| 145 | u.read.special = .long_name |
| 146 | u.reader.other_block(mut u.read, 'size:${u.size}') |
| 147 | if u.size > 0 { |
| 148 | u.state = .long_path |
| 149 | u.long_path = new_long_path(u.size) |
| 150 | } |
| 151 | } |
| 152 | .hard_link, .sym_link, .char_dev, .block_dev, .fifo { |
| 153 | u.read.special = .ignore |
| 154 | u.reader.other_block(mut u.read, block_header.str()) |
| 155 | } |
| 156 | .global { |
| 157 | u.read.special = .global |
| 158 | u.read.set_short_path(u.buffer, false) |
| 159 | u.reader.other_block(mut u.read, 'size:${u.size}') |
| 160 | if u.size > 0 { |
| 161 | u.state = .data |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | // reader_data calls Reader.data_block for implementor to collect data parts as file content |
| 168 | fn (mut u Untar) read_data() { |
| 169 | if u.size > 0 { |
| 170 | part := if u.size > 512 { 512 } else { u.size } |
| 171 | u.size -= 512 |
| 172 | pending := if u.size > 0 { u.size } else { 0 } |
| 173 | data_part := u.buffer[0..part] |
| 174 | u.reader.data_block(mut u.read, data_part, pending) |
| 175 | } |
| 176 | if u.size <= 0 { |
| 177 | u.long_path = unsafe { nil } |
| 178 | u.read.long_path = unsafe { nil } // real clear |
| 179 | u.state = .header |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | fn (mut u Untar) read_long_path() { |
| 184 | if u.size > 0 { |
| 185 | part := if u.size > 512 { 512 } else { u.size } |
| 186 | u.size -= 512 |
| 187 | data_part := u.buffer[0..part] |
| 188 | if u.long_path != unsafe { nil } { |
| 189 | // this long path field collects the data parts as file long name |
| 190 | u.long_path.append(data_part) |
| 191 | u.reader.other_block(mut u.read, 'data_part:${data_part.len}') |
| 192 | } |
| 193 | } |
| 194 | if u.size <= 0 { |
| 195 | u.state = .header |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | // extract_path returns the block path for directories and files. |
| 200 | fn (mut u Untar) extract_path() string { |
| 201 | mut name := []u8{} |
| 202 | mut i := 0 |
| 203 | for { |
| 204 | if i >= u.buffer.len { |
| 205 | break |
| 206 | } |
| 207 | letter := u.buffer[i] |
| 208 | if letter == 0 { |
| 209 | break |
| 210 | } |
| 211 | name << letter |
| 212 | i++ |
| 213 | } |
| 214 | return name.bytestr() |
| 215 | } |
| 216 | |
| 217 | // checksum_ok verifies the validity for dir and files blocks. |
| 218 | fn (mut u Untar) checksum_ok() bool { |
| 219 | mut v := u64(0) |
| 220 | for n := 0; n < 512; n++ { |
| 221 | if n < 148 || n > 155 { |
| 222 | v += u.buffer[n] |
| 223 | } else { |
| 224 | v += 0x20 |
| 225 | } |
| 226 | } |
| 227 | parse := u.extract_octal(148, 8) |
| 228 | return v == parse |
| 229 | } |
| 230 | |
| 231 | // extract_octal reads an octal number at block position `pos` with a given number of `digits`. |
| 232 | fn (mut u Untar) extract_octal(pos int, digits int) u64 { |
| 233 | mut i := u64(0) |
| 234 | mut p := pos |
| 235 | mut n := digits |
| 236 | for { |
| 237 | if (u.buffer[p] < `0` || u.buffer[p] > `7`) && n > 0 { |
| 238 | p++ |
| 239 | n-- |
| 240 | } else { |
| 241 | break |
| 242 | } |
| 243 | } |
| 244 | for { |
| 245 | if u.buffer[p] >= `0` && u.buffer[p] <= `7` && n > 0 { |
| 246 | i *= 8 |
| 247 | i += u8(u.buffer[p] - `0`) |
| 248 | p++ |
| 249 | n-- |
| 250 | } else { |
| 251 | break |
| 252 | } |
| 253 | } |
| 254 | return i |
| 255 | } |
| 256 | |
| 257 | @[heap] |
| 258 | struct LongPath { |
| 259 | mut: |
| 260 | name []u8 |
| 261 | last_pos int |
| 262 | } |
| 263 | |
| 264 | // new_long_path builds a LongPath with a fixed maximum name size |
| 265 | fn new_long_path(size int) &LongPath { |
| 266 | return &LongPath{ |
| 267 | name: []u8{len: size} |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | // appends copies the data to the |
| 272 | fn (mut l LongPath) append(data []u8) { |
| 273 | if l.name.len >= l.last_pos + data.len { |
| 274 | for i, d in data { |
| 275 | l.name[l.last_pos + i] = d |
| 276 | } |
| 277 | l.last_pos += data.len |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | // get_path returns the string from name appended as C string. |
| 282 | fn (l LongPath) get_path() string { |
| 283 | mut s := []u8{} |
| 284 | for n in l.name { |
| 285 | if n == 0 { |
| 286 | break |
| 287 | } |
| 288 | s << n |
| 289 | } |
| 290 | return s.bytestr() |
| 291 | } |
| 292 | |