| 1 | // zstd(https://github.com/facebook/zstd) is a fast real-time compression algorithm developed by Facebook. |
| 2 | // zstd compression/decompression |
| 3 | module zstd |
| 4 | |
| 5 | import os |
| 6 | import encoding.binary |
| 7 | |
| 8 | #flag -I @VMODROOT/thirdparty/zstd |
| 9 | #include "zstd.c" // msvc can't compile multiple source files, so included |
| 10 | |
| 11 | const frame_header_size_max = 18 |
| 12 | const content_size_unknown = u64(-1) |
| 13 | const content_size_error = u64(-2) |
| 14 | |
| 15 | const buf_in_size = 1024 * 1024 |
| 16 | const buf_out_size = 1024 * 1024 |
| 17 | |
| 18 | fn C.ZSTD_versionNumber() u32 |
| 19 | fn C.ZSTD_versionString() charptr |
| 20 | |
| 21 | fn C.ZSTD_compress(voidptr, usize, voidptr, usize, i32) usize |
| 22 | fn C.ZSTD_decompress(voidptr, usize, voidptr, usize) usize |
| 23 | fn C.ZSTD_getFrameContentSize(voidptr, usize) u64 |
| 24 | fn C.ZSTD_findFrameCompressedSize(voidptr, usize) usize |
| 25 | fn C.ZSTD_compressBound(usize) usize |
| 26 | fn C.ZSTD_isError(usize) u32 |
| 27 | fn C.ZSTD_getErrorName(usize) charptr |
| 28 | fn C.ZSTD_minCLevel() i32 |
| 29 | fn C.ZSTD_maxCLevel() i32 |
| 30 | fn C.ZSTD_defaultCLevel() i32 |
| 31 | fn C.ZSTD_createCCtx() &C.ZSTD_CCtx_s |
| 32 | fn C.ZSTD_freeCCtx(voidptr) usize |
| 33 | fn C.ZSTD_compressCCtx(voidptr, voidptr, usize, voidptr, usize, i32) usize |
| 34 | fn C.ZSTD_createDCtx() &C.ZSTD_DCtx_s |
| 35 | fn C.ZSTD_freeDCtx(voidptr) usize |
| 36 | fn C.ZSTD_decompressDCtx(voidptr, voidptr, usize, voidptr, usize) usize |
| 37 | |
| 38 | // note : new strategies _might_ be added in the future. Only the order (from fast to strong) is guaranteed |
| 39 | pub enum Strategy { |
| 40 | default = 0 |
| 41 | fast = 1 |
| 42 | dfast = 2 |
| 43 | greedy = 3 |
| 44 | lazy = 4 |
| 45 | lazy2 = 5 |
| 46 | btlazy2 = 6 |
| 47 | btopt = 7 |
| 48 | btultra = 8 |
| 49 | btultra2 = 9 |
| 50 | } |
| 51 | |
| 52 | pub enum CParameter { |
| 53 | // compression parameters |
| 54 | // Note: When compressing with a ZSTD_CDict these parameters are superseded |
| 55 | // by the parameters used to construct the ZSTD_CDict. |
| 56 | // See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). |
| 57 | // |
| 58 | // Set compression parameters according to pre-defined cLevel table. |
| 59 | // Note that exact compression parameters are dynamically determined, |
| 60 | // depending on both compression level and srcSize (when known). |
| 61 | // Default level is ZSTD_CLEVEL_DEFAULT==3. |
| 62 | // Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. |
| 63 | // Note 1 : it's possible to pass a negative compression level. |
| 64 | // Note 2 : setting a level does not automatically set all other compression parameters |
| 65 | // to default. Setting this will however eventually dynamically impact the compression |
| 66 | // parameters which have not been manually set. The manually set |
| 67 | // ones will 'stick'. |
| 68 | compression_level = 100 |
| 69 | // Advanced compression parameters : |
| 70 | // It's possible to pin down compression parameters to some specific values. |
| 71 | // In which case, these values are no longer dynamically selected by the compressor |
| 72 | // |
| 73 | // Maximum allowed back-reference distance, expressed as power of 2. |
| 74 | // This will set a memory budget for streaming decompression, |
| 75 | // with larger values requiring more memory |
| 76 | // and typically compressing more. |
| 77 | // Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. |
| 78 | // Special: value 0 means "use default windowLog". |
| 79 | // Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT |
| 80 | // requires explicitly allowing such size at streaming decompression stage. |
| 81 | window_log = 101 |
| 82 | // Size of the initial probe table, as a power of 2. |
| 83 | // Resulting memory usage is (1 << (hashLog+2)). |
| 84 | // Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. |
| 85 | // Larger tables improve compression ratio of strategies <= dFast, |
| 86 | // and improve speed of strategies > dFast. |
| 87 | // Special: value 0 means "use default hashLog". |
| 88 | hash_log = 102 |
| 89 | // Size of the multi-probe search table, as a power of 2. |
| 90 | // Resulting memory usage is (1 << (chainLog+2)). |
| 91 | // Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. |
| 92 | // Larger tables result in better and slower compression. |
| 93 | // This parameter is useless for "fast" strategy. |
| 94 | // It's still useful when using "dfast" strategy, |
| 95 | // in which case it defines a secondary probe table. |
| 96 | // Special: value 0 means "use default chainLog". |
| 97 | chain_log = 103 |
| 98 | // Number of search attempts, as a power of 2. |
| 99 | // More attempts result in better and slower compression. |
| 100 | // This parameter is useless for "fast" and "dFast" strategies. |
| 101 | // Special: value 0 means "use default searchLog". |
| 102 | search_log = 104 |
| 103 | // Minimum size of searched matches. |
| 104 | // Note that Zstandard can still find matches of smaller size, |
| 105 | // it just tweaks its search algorithm to look for this size and larger. |
| 106 | // Larger values increase compression and decompression speed, but decrease ratio. |
| 107 | // Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. |
| 108 | // Note that currently, for all strategies < btopt, effective minimum is 4. |
| 109 | // , for all strategies > fast, effective maximum is 6. |
| 110 | // Special: value 0 means "use default minMatchLength". |
| 111 | min_match = 105 |
| 112 | // Impact of this field depends on strategy. |
| 113 | // For strategies btopt, btultra & btultra2: |
| 114 | // Length of Match considered "good enough" to stop search. |
| 115 | // Larger values make compression stronger, and slower. |
| 116 | // For strategy fast: |
| 117 | // Distance between match sampling. |
| 118 | // Larger values make compression faster, and weaker. |
| 119 | // Special: value 0 means "use default targetLength". |
| 120 | target_length = 106 |
| 121 | // See ZSTD_strategy enum definition. |
| 122 | // The higher the value of selected strategy, the more complex it is, |
| 123 | // resulting in stronger and slower compression. |
| 124 | // Special: value 0 means "use default strategy". |
| 125 | strategy = 107 |
| 126 | // v1.5.6+ |
| 127 | // Attempts to fit compressed block size into approximately targetCBlockSize. |
| 128 | // Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. |
| 129 | // Note that it's not a guarantee, just a convergence target (default:0). |
| 130 | // No target when targetCBlockSize == 0. |
| 131 | // This is helpful in low bandwidth streaming environments to improve end-to-end latency, |
| 132 | // when a client can make use of partial documents (a prominent example being Chrome). |
| 133 | // Note: this parameter is stable since v1.5.6. |
| 134 | // It was present as an experimental parameter in earlier versions, |
| 135 | // but it's not recommended using it with earlier library versions |
| 136 | // due to massive performance regressions. |
| 137 | target_c_block_size = 130 |
| 138 | // LDM mode parameters |
| 139 | // Enable long distance matching. |
| 140 | // This parameter is designed to improve compression ratio |
| 141 | // for large inputs, by finding large matches at long distance. |
| 142 | // It increases memory usage and window size. |
| 143 | // Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB |
| 144 | // except when expressly set to a different value. |
| 145 | // Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and |
| 146 | // compression strategy >= ZSTD_btopt (== compression level 16+) |
| 147 | enable_long_distance_matching = 160 |
| 148 | // Size of the table for long distance matching, as a power of 2. |
| 149 | // Larger values increase memory usage and compression ratio, |
| 150 | // but decrease compression speed. |
| 151 | // Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX |
| 152 | // default: windowlog - 7. |
| 153 | // Special: value 0 means "automatically determine hashlog". |
| 154 | ldm_hash_log = 161 |
| 155 | // Minimum match size for long distance matcher. |
| 156 | // Larger/too small values usually decrease compression ratio. |
| 157 | // Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. |
| 158 | // Special: value 0 means "use default value" (default: 64). |
| 159 | ldm_min_match = 162 |
| 160 | // log size of each bucket in the ldm hash table for collision resolution. |
| 161 | // Larger values improve collision resolution but decrease compression speed. |
| 162 | // The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. |
| 163 | // Special: value 0 means "use default value" (default: 3). |
| 164 | ldm_bucket_size_log = 163 |
| 165 | // Frequency of inserting/looking up entries into the LDM hash table. |
| 166 | // Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). |
| 167 | // Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. |
| 168 | // Larger values improve compression speed. |
| 169 | // Deviating far from default value will likely result in a compression ratio decrease. |
| 170 | // Special: value 0 means "automatically determine hashRateLog". |
| 171 | ldm_hash_rate_log = 164 |
| 172 | // frame parameters |
| 173 | // Content size will be written into frame header _whenever known_ (default:1) |
| 174 | // Content size must be known at the beginning of compression. |
| 175 | // This is automatically the case when using ZSTD_compress2(), |
| 176 | // For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() |
| 177 | content_size_flag = 200 |
| 178 | // A 32-bits checksum of content is written at end of frame (default:0) |
| 179 | checksum_flag = 201 |
| 180 | // When applicable, dictionary's ID is written into frame header (default:1) |
| 181 | dict_id_flag = 202 |
| 182 | // multi-threading parameters |
| 183 | // These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). |
| 184 | // Otherwise, trying to set any other value than default (0) will be a no-op and return an error. |
| 185 | // In a situation where it's unknown if the linked library supports multi-threading or not, |
| 186 | // setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. |
| 187 | // |
| 188 | // Select how many threads will be spawned to compress in parallel. |
| 189 | // When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : |
| 190 | // ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, |
| 191 | // while compression is performed in parallel, within worker thread(s). |
| 192 | // (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : |
| 193 | // in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). |
| 194 | // More workers improve speed, but also increase memory usage. |
| 195 | // Default value is `0`, aka "single-threaded mode" : no worker is spawned, |
| 196 | // compression is performed inside Caller's thread, and all invocations are blocking |
| 197 | nb_workers = 400 |
| 198 | // Size of a compression job. This value is enforced only when nbWorkers >= 1. |
| 199 | // Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. |
| 200 | // 0 means default, which is dynamically determined based on compression parameters. |
| 201 | // Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. |
| 202 | // The minimum size is automatically and transparently enforced. |
| 203 | job_size = 401 |
| 204 | // Control the overlap size, as a fraction of window size. |
| 205 | // The overlap size is an amount of data reloaded from previous job at the beginning of a new job. |
| 206 | // It helps preserve compression ratio, while each job is compressed in parallel. |
| 207 | // This value is enforced only when nbWorkers >= 1. |
| 208 | // Larger values increase compression ratio, but decrease speed. |
| 209 | // Possible values range from 0 to 9 : |
| 210 | // - 0 means "default" : value will be determined by the library, depending on strategy |
| 211 | // - 1 means "no overlap" |
| 212 | // - 9 means "full overlap", using a full window size. |
| 213 | // Each intermediate rank increases/decreases load size by a factor 2 : |
| 214 | // 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default |
| 215 | // default value varies between 6 and 9, depending on strategy |
| 216 | overlap_log = 402 |
| 217 | // note : additional experimental parameters are also available |
| 218 | // within the experimental section of the API. |
| 219 | // At the time of this writing, they include : |
| 220 | // zstd_c_rsyncable |
| 221 | // zstd_c_format |
| 222 | // zstd_c_force_max_window |
| 223 | // zstd_c_force_attach_dict |
| 224 | // zstd_c_literal_compression_mode |
| 225 | // zstd_c_target_c_block_size |
| 226 | // zstd_c_src_size_hint |
| 227 | // zstd_c_enable_dedicated_dict_search |
| 228 | // zstd_c_stable_in_buffer |
| 229 | // zstd_c_stable_out_buffer |
| 230 | // zstd_c_block_delimiters |
| 231 | // zstd_c_validate_sequences |
| 232 | // zstd_c_use_block_splitter |
| 233 | // zstd_c_use_row_match_finder |
| 234 | // zstd_c_prefetch_c_dict_tables |
| 235 | // zstd_c_enable_seq_producer_fallback |
| 236 | // zstd_c_max_block_size |
| 237 | // Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
| 238 | // note : never ever use experimentalParam? names directly; |
| 239 | // also, the enums values themselves are unstable and can still change. |
| 240 | // |
| 241 | experimental_param1 = 500 |
| 242 | experimental_param2 = 10 |
| 243 | experimental_param3 = 1000 |
| 244 | experimental_param4 = 1001 |
| 245 | experimental_param5 = 1002 |
| 246 | // experimental_param6 = 1003 is now ZSTD_c_targetCBlockSize |
| 247 | experimental_param7 = 1004 |
| 248 | experimental_param8 = 1005 |
| 249 | experimental_param9 = 1006 |
| 250 | experimental_param10 = 1007 |
| 251 | experimental_param11 = 1008 |
| 252 | experimental_param12 = 1009 |
| 253 | experimental_param13 = 1010 |
| 254 | experimental_param14 = 1011 |
| 255 | experimental_param15 = 1012 |
| 256 | experimental_param16 = 1013 |
| 257 | experimental_param17 = 1014 |
| 258 | experimental_param18 = 1015 |
| 259 | experimental_param19 = 1016 |
| 260 | experimental_param20 = 1017 |
| 261 | } |
| 262 | |
| 263 | pub struct Bounds { |
| 264 | pub: |
| 265 | error usize |
| 266 | lower_bound int |
| 267 | upper_bound int |
| 268 | } |
| 269 | |
| 270 | fn C.ZSTD_cParam_getBounds(CParameter) Bounds |
| 271 | fn C.ZSTD_CCtx_setParameter(voidptr, CParameter, i32) usize |
| 272 | fn C.ZSTD_CCtx_setPledgedSrcSize(voidptr, u64) usize |
| 273 | |
| 274 | pub enum ResetDirective { |
| 275 | session_only = 1 |
| 276 | parameters = 2 |
| 277 | session_and_parameters = 3 |
| 278 | } |
| 279 | |
| 280 | fn C.ZSTD_CCtx_reset(voidptr, ResetDirective) usize |
| 281 | fn C.ZSTD_compress2(voidptr, voidptr, usize, voidptr, usize) usize |
| 282 | |
| 283 | pub enum DParameter { |
| 284 | // Select a size limit (in power of 2) beyond which |
| 285 | // the streaming API will refuse to allocate memory buffer |
| 286 | // in order to protect the host from unreasonable memory requirements. |
| 287 | // This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. |
| 288 | // By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). |
| 289 | // Special: value 0 means "use default maximum windowLog". |
| 290 | window_log_max = 100 |
| 291 | // note : additional experimental parameters are also available |
| 292 | // within the experimental section of the API. |
| 293 | // At the time of this writing, they include : |
| 294 | // ZSTD_d_format |
| 295 | // zstd_d_stable_out_buffer |
| 296 | // zstd_d_force_ignore_checksum |
| 297 | // zstd_d_ref_multipled_dicts |
| 298 | // zstd_d_disable_huffman_assembly |
| 299 | // Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
| 300 | // note : never ever use experimentalParam? names directly |
| 301 | experimental_param1 = 1000 |
| 302 | experimental_param2 = 1001 |
| 303 | experimental_param3 = 1002 |
| 304 | experimental_param4 = 1003 |
| 305 | experimental_param5 = 1004 |
| 306 | experimental_param6 = 1005 |
| 307 | } |
| 308 | |
| 309 | fn C.ZSTD_dParam_getBounds(DParameter) Bounds |
| 310 | fn C.ZSTD_DCtx_setParameter(voidptr, DParameter, i32) usize |
| 311 | fn C.ZSTD_DCtx_reset(voidptr, ResetDirective) usize |
| 312 | |
| 313 | // streaming compression |
| 314 | pub struct InBuffer { |
| 315 | pub mut: |
| 316 | src voidptr |
| 317 | size usize |
| 318 | pos usize |
| 319 | } |
| 320 | |
| 321 | pub struct OutBuffer { |
| 322 | pub mut: |
| 323 | dst voidptr |
| 324 | size usize |
| 325 | pos usize |
| 326 | } |
| 327 | |
| 328 | fn C.ZSTD_createCStream() voidptr |
| 329 | fn C.ZSTD_freeCStream(voidptr) usize |
| 330 | |
| 331 | pub enum EndDirective { |
| 332 | // collect more data, encoder decides when to output compressed result, for optimal compression ratio |
| 333 | continue = 0 |
| 334 | // flush any data provided so far, |
| 335 | // it creates (at least) one new block, that can be decoded immediately on reception; |
| 336 | // frame will continue: any future data can still reference previously compressed data, improving compression. |
| 337 | // note : multithreaded compression will block to flush as much output as possible. |
| 338 | flush = 1 |
| 339 | // flush any remaining data _and_ close current frame. |
| 340 | // note that frame is only closed after compressed data is fully flushed (return value == 0). |
| 341 | // After that point, any additional data starts a new frame. |
| 342 | // note : each frame is independent (does not reference any content from previous frame). |
| 343 | // note : multithreaded compression will block to flush as much output as possible. |
| 344 | end = 2 |
| 345 | } |
| 346 | |
| 347 | fn C.ZSTD_compressStream2(voidptr, &OutBuffer, &InBuffer, EndDirective) usize |
| 348 | fn C.ZSTD_CStreamInSize() usize |
| 349 | fn C.ZSTD_CStreamOutSize() usize |
| 350 | fn C.ZSTD_initCStream(voidptr, i32) usize |
| 351 | fn C.ZSTD_compressStream(voidptr, &OutBuffer, &InBuffer) usize |
| 352 | fn C.ZSTD_flushStream(voidptr, &OutBuffer) usize |
| 353 | fn C.ZSTD_endStream(voidptr, &OutBuffer) usize |
| 354 | |
| 355 | // streaming decompression |
| 356 | fn C.ZSTD_createDStream() voidptr |
| 357 | fn C.ZSTD_freeDStream(voidptr) usize |
| 358 | fn C.ZSTD_initDStream(voidptr) usize |
| 359 | fn C.ZSTD_decompressStream(voidptr, &OutBuffer, &InBuffer) usize |
| 360 | fn C.ZSTD_DStreamInSize() usize |
| 361 | fn C.ZSTD_DStreamOutSize() usize |
| 362 | |
| 363 | // version_number returns runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). |
| 364 | pub fn version_number() u32 { |
| 365 | return C.ZSTD_versionNumber() |
| 366 | } |
| 367 | |
| 368 | // version_string returns runtime library version, like "1.5.7". |
| 369 | pub fn version_string() string { |
| 370 | return unsafe { tos_clone(C.ZSTD_versionString()) } |
| 371 | } |
| 372 | |
| 373 | // is_error tells if a `usize` function result is an error code. |
| 374 | pub fn is_error(code usize) bool { |
| 375 | return C.ZSTD_isError(code) == 1 |
| 376 | } |
| 377 | |
| 378 | // get_error_name provides readable string from an error code. |
| 379 | pub fn get_error_name(code usize) string { |
| 380 | return unsafe { tos_clone(&u8(C.ZSTD_getErrorName(code))) } |
| 381 | } |
| 382 | |
| 383 | // check_zstd checks the zstd error code, and return a error string. |
| 384 | pub fn check_error(code usize) ! { |
| 385 | if is_error(code) { |
| 386 | return error(get_error_name(code)) |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | // min_c_level returns minimum negative compression level allowed. |
| 391 | pub fn min_c_level() int { |
| 392 | return C.ZSTD_minCLevel() |
| 393 | } |
| 394 | |
| 395 | // max_c_level returns maximum compression level available. |
| 396 | pub fn max_c_level() int { |
| 397 | return C.ZSTD_maxCLevel() |
| 398 | } |
| 399 | |
| 400 | // default_c_level returns default compression level. |
| 401 | pub fn default_c_level() int { |
| 402 | return C.ZSTD_defaultCLevel() |
| 403 | } |
| 404 | |
| 405 | @[params] |
| 406 | pub struct CompressParams { |
| 407 | pub: |
| 408 | // 1~22 |
| 409 | compression_level int = default_c_level() |
| 410 | // how many threads will be spawned to compress in parallel |
| 411 | nb_threads int = 1 |
| 412 | checksum_flag bool = true |
| 413 | strategy Strategy = .default |
| 414 | } |
| 415 | |
| 416 | // compresses an array of bytes using zstd and returns the compressed bytes in a new array |
| 417 | // extra compression parameters can be set by `params` |
| 418 | // Example: b := 'abcdef'.repeat(1000).bytes(); cmpr := zstd.compress(b, compression_level: 10)!; assert cmpr.len < b.len; dc := zstd.decompress(cmpr)!; assert b == dc |
| 419 | pub fn compress(data []u8, params CompressParams) ![]u8 { |
| 420 | dst_capacity := C.ZSTD_compressBound(data.len) |
| 421 | check_error(dst_capacity)! |
| 422 | mut dst := []u8{len: int(dst_capacity)} |
| 423 | mut cctx := new_cctx(params)! |
| 424 | defer { |
| 425 | cctx.free_cctx() |
| 426 | } |
| 427 | size := C.ZSTD_compress2(cctx.ctx, dst.data, dst.len, data.data, data.len) |
| 428 | check_error(size)! |
| 429 | return dst[..int(size)] |
| 430 | } |
| 431 | |
| 432 | @[params] |
| 433 | pub struct DecompressParams { |
| 434 | pub: |
| 435 | window_log_max int |
| 436 | } |
| 437 | |
| 438 | // decompresses an array of bytes using zstd and returns the decompressed bytes in a new array |
| 439 | // extra decompression parameters can be set by `params` |
| 440 | // Example: b := 'abcdef'.repeat(1000).bytes(); cmpr := zstd.compress(b, compression_level: 10)!; assert cmpr.len < b.len; dc := zstd.decompress(cmpr)!; assert b == dc |
| 441 | pub fn decompress(data []u8, params DecompressParams) ![]u8 { |
| 442 | dst_capacity := C.ZSTD_getFrameContentSize(data.data, frame_header_size_max) |
| 443 | if dst_capacity == content_size_unknown { |
| 444 | return error('The size cannot be determined, try use streaming mode to decompress data?') |
| 445 | } else if dst_capacity == content_size_error { |
| 446 | return error('An error occurred (e.g. invalid magic number, srcSize too small)') |
| 447 | } else if dst_capacity == 0 { |
| 448 | return error('The frame is valid but empty') |
| 449 | } |
| 450 | mut dst := []u8{len: int(dst_capacity)} |
| 451 | decompressed_size := C.ZSTD_decompress(dst.data, dst.len, data.data, data.len) |
| 452 | check_error(decompressed_size)! |
| 453 | return dst[..int(decompressed_size)] |
| 454 | } |
| 455 | |
| 456 | pub struct CCtx { |
| 457 | mut: |
| 458 | ctx &C.ZSTD_CCtx_s |
| 459 | } |
| 460 | |
| 461 | // new_cctx create a compression context. |
| 462 | // extra compression parameters can be set by `params` |
| 463 | pub fn new_cctx(params CompressParams) !&CCtx { |
| 464 | mut ctx := C.ZSTD_createCCtx() |
| 465 | if isnil(ctx) { |
| 466 | return error('new_cctx() failed!') |
| 467 | } |
| 468 | mut cctx := &CCtx{ctx} |
| 469 | cctx.set(.compression_level, params.compression_level)! |
| 470 | $if !(tinyc && windows) { |
| 471 | // TODO: tinyc on windows doesn't support multiple thread |
| 472 | cctx.set(.nb_workers, params.nb_threads)! |
| 473 | } |
| 474 | cctx.set(.checksum_flag, if params.checksum_flag { 1 } else { 0 })! |
| 475 | cctx.set(.strategy, int(params.strategy))! |
| 476 | return cctx |
| 477 | } |
| 478 | |
| 479 | // set_parameter set compression parameter `c_param` to value `val` |
| 480 | pub fn (mut c CCtx) set(c_param CParameter, val int) ! { |
| 481 | check_error(C.ZSTD_CCtx_setParameter(c.ctx, c_param, val))! |
| 482 | } |
| 483 | |
| 484 | // compress_stream2 do stream compress on `input`, and store compressed data in `output`. |
| 485 | // `mode`: |
| 486 | // .zstd_e_continue => continue stream compression. |
| 487 | // .zstd_e_flush => flush data |
| 488 | // .zstd_e_end => it is the last frame |
| 489 | pub fn (mut c CCtx) compress_stream2(output &OutBuffer, input &InBuffer, mode EndDirective) !usize { |
| 490 | res := C.ZSTD_compressStream2(c.ctx, output, input, mode) |
| 491 | check_error(res)! |
| 492 | return res |
| 493 | } |
| 494 | |
| 495 | // free_cctx free a compression context. |
| 496 | pub fn (mut c CCtx) free_cctx() usize { |
| 497 | return C.ZSTD_freeCCtx(c.ctx) |
| 498 | } |
| 499 | |
| 500 | struct C.ZSTD_CCtx_s {} |
| 501 | |
| 502 | struct C.ZSTD_DCtx_s {} |
| 503 | |
| 504 | pub struct DCtx { |
| 505 | mut: |
| 506 | ctx &C.ZSTD_DCtx_s |
| 507 | } |
| 508 | |
| 509 | // new_dctx creates a decompression context. |
| 510 | // extra decompression parameters can be set by `params` |
| 511 | pub fn new_dctx(params DecompressParams) !&DCtx { |
| 512 | mut ctx := C.ZSTD_createDCtx() |
| 513 | if isnil(ctx) { |
| 514 | return error('new_dctx() failed!') |
| 515 | } |
| 516 | mut dctx := &DCtx{ctx} |
| 517 | dctx.set(.window_log_max, params.window_log_max)! |
| 518 | return dctx |
| 519 | } |
| 520 | |
| 521 | // set_parameter sets decompression parameter `d_param` to value `val` |
| 522 | pub fn (mut d DCtx) set(d_param DParameter, val int) ! { |
| 523 | check_error(C.ZSTD_DCtx_setParameter(d.ctx, d_param, val))! |
| 524 | } |
| 525 | |
| 526 | // decompress_stream do stream decompress on `input`, and store decompressed data in `output`. |
| 527 | // return remaining bytes in `input` stream |
| 528 | pub fn (mut d DCtx) decompress_stream(output &OutBuffer, input &InBuffer) !usize { |
| 529 | res := C.ZSTD_decompressStream(d.ctx, output, input) |
| 530 | check_error(res)! |
| 531 | return res |
| 532 | } |
| 533 | |
| 534 | // free_cctx free a compression context |
| 535 | pub fn (mut d DCtx) free_dctx() usize { |
| 536 | return C.ZSTD_freeDCtx(d.ctx) |
| 537 | } |
| 538 | |
| 539 | // store_array compress an `array`'s data, and store it to file `fname`. |
| 540 | // extra compression parameters can be set by `params` |
| 541 | // WARNING: Because struct padding, some data in struct may be marked unused. |
| 542 | // So, when `store_array`, it will cause memory fsanitize fail with 'use-of-uninitialized-value'. |
| 543 | // It can be safely ignored. |
| 544 | // For example, following struct may cause memory fsanitize fail: |
| 545 | // struct MemoryTrace { |
| 546 | // operation u8 |
| 547 | // address u64 |
| 548 | // size u8 |
| 549 | // } |
| 550 | // By changing it into following , you can pass the memory fsanitize check : |
| 551 | // struct MemoryTrace { |
| 552 | // operation u64 |
| 553 | // address u64 |
| 554 | // size u64 |
| 555 | // } |
| 556 | pub fn store_array[T](fname string, array []T, params CompressParams) ! { |
| 557 | mut fout := os.open_file(fname, 'wb')! |
| 558 | mut cctx := new_cctx(params)! |
| 559 | defer { |
| 560 | cctx.free_cctx() |
| 561 | fout.close() |
| 562 | } |
| 563 | |
| 564 | mut buf_out := []u8{len: buf_out_size} |
| 565 | mut input := &InBuffer{} |
| 566 | mut output := &OutBuffer{} |
| 567 | // first, write the array.len to file |
| 568 | mut len_buf := []u8{len: 8} |
| 569 | binary.little_endian_put_u64(mut len_buf, u64(array.len)) |
| 570 | input.src = len_buf.data |
| 571 | input.size = 8 |
| 572 | input.pos = 0 |
| 573 | output.dst = buf_out.data |
| 574 | output.size = buf_out_size |
| 575 | output.pos = 0 |
| 576 | mut remaining := cctx.compress_stream2(output, input, .flush)! |
| 577 | fout.write(buf_out[..int(output.pos)])! |
| 578 | // then, write the array.data to file |
| 579 | input.src = array.data |
| 580 | input.size = usize(array.len * int(sizeof(T))) |
| 581 | input.pos = 0 |
| 582 | output.dst = buf_out.data |
| 583 | output.size = buf_out_size |
| 584 | output.pos = 0 |
| 585 | for { |
| 586 | output.dst = buf_out.data |
| 587 | output.size = buf_out_size |
| 588 | output.pos = 0 |
| 589 | remaining = cctx.compress_stream2(output, input, .end)! |
| 590 | fout.write(buf_out[..int(output.pos)])! |
| 591 | if remaining == 0 { |
| 592 | break |
| 593 | } |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | // load_array return an array which data is decompressed from a file `fname`. |
| 598 | // extra decompression parameters can be set by `params` |
| 599 | pub fn load_array[T](fname string, params DecompressParams) ![]T { |
| 600 | mut fin := os.open_file(fname, 'rb')! |
| 601 | mut dctx := new_dctx(params)! |
| 602 | defer { |
| 603 | dctx.free_dctx() |
| 604 | fin.close() |
| 605 | } |
| 606 | |
| 607 | mut buf_in := []u8{len: buf_in_size} |
| 608 | mut len_buf := []u8{len: 8} |
| 609 | mut input := &InBuffer{} |
| 610 | mut output := &OutBuffer{} |
| 611 | mut last_ret := usize(0) |
| 612 | mut last_chunk := false |
| 613 | // first, read the array.len from file |
| 614 | mut read_len := fin.read(mut buf_in)! |
| 615 | last_chunk = read_len < buf_in.len |
| 616 | input.src = buf_in.data |
| 617 | input.size = usize(read_len) |
| 618 | input.pos = 0 |
| 619 | output.dst = len_buf.data |
| 620 | output.size = usize(len_buf.len) |
| 621 | output.pos = 0 |
| 622 | last_ret = dctx.decompress_stream(output, input)! |
| 623 | len := binary.little_endian_u64(len_buf) |
| 624 | // then, read the array.data from file |
| 625 | mut result := []T{len: int(len)} |
| 626 | output.dst = result.data |
| 627 | output.size = usize(result.len) * sizeof(T) |
| 628 | output.pos = 0 |
| 629 | last_ret = dctx.decompress_stream(output, input)! |
| 630 | for !last_chunk { |
| 631 | read_len = fin.read(mut buf_in)! |
| 632 | last_chunk = read_len < buf_in.len |
| 633 | input.size = usize(read_len) |
| 634 | input.pos = 0 |
| 635 | for input.pos < input.size { |
| 636 | last_ret = dctx.decompress_stream(output, input)! |
| 637 | } |
| 638 | if read_len < buf_in.len { |
| 639 | break |
| 640 | } |
| 641 | } |
| 642 | if last_ret != 0 { |
| 643 | // The last return value from ZSTD_decompressStream did not end on a |
| 644 | // frame, but we reached the end of the file! We assume this is an |
| 645 | // error, and the input was truncated. |
| 646 | return error('EOF before end of stream: ${last_ret}') |
| 647 | } |
| 648 | return result |
| 649 | } |
| 650 | |