v2 / vlib / encoding / base32 / base32.v
381 lines · 340 sloc · 10.98 KB · 008aaad99981918c51194d7aaaaaccb4c258f244
Raw
1// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5// Module base32 implements base32 encoding as specified by RFC 4648.
6// Based off: https://github.com/golang/go/blob/master/src/encoding/base32/base32.go
7// Last commit: https://github.com/golang/go/commit/e1b62efaf33988a5153510898d37309cee78f26e
8
9// TODO: standardize fn naming conventions & strip newlines on input & clean up an go remnant's
10
11module base32
12
13pub const std_padding = `=` // Standard padding character
14
15pub const no_padding = u8(-1) // No padding
16
17pub const std_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'.bytes()
18pub const hex_alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUV'.bytes()
19
20struct Encoding {
21 padding_char u8
22 alphabet []u8
23mut:
24 decode_map [256]u8
25}
26
27// decode_string_to_string decodes a V string `src` using Base32
28// and returns the decoded string or a `corrupt_input_error_msg` error.
29pub fn decode_string_to_string(src string) !string {
30 return decode_to_string(src.bytes())
31}
32
33// decode_to_string decodes a byte array `src` using Base32
34// and returns the decoded string or a `corrupt_input_error_msg` error.
35pub fn decode_to_string(src []u8) !string {
36 res := decode(src)!
37 return res.bytestr()
38}
39
40// decode decodes a byte array `src` using Base32
41// and returns the decoded bytes or a `corrupt_input_error_msg` error.
42pub fn decode(src []u8) ![]u8 {
43 mut e := new_encoding(std_alphabet)
44 return e.decode(src)
45}
46
47// encode_string_to_string encodes the V string `src` using Base32
48// and returns the encoded bytes as a V string.
49pub fn encode_string_to_string(src string) string {
50 return encode_to_string(src.bytes())
51}
52
53// encode_to_string encodes a byte array `src` using Base32 and
54// returns the encoded bytes as a V string.
55pub fn encode_to_string(src []u8) string {
56 return encode(src).bytestr()
57}
58
59// encode encodes a byte array `src` using Base32 and returns the
60// encoded bytes.
61pub fn encode(src []u8) []u8 {
62 e := new_encoding(std_alphabet)
63 return e.encode(src)
64}
65
66// encode_to_string encodes the Base32 encoding of `src` with
67// the encoding `enc` and returns the encoded bytes as a V string.
68pub fn (enc &Encoding) encode_to_string(src []u8) string {
69 return enc.encode(src).bytestr()
70}
71
72// encode_string_to_string encodes a V string `src` using Base32 with
73// the encoding `enc` and returns the encoded bytes as a V string.
74pub fn (enc &Encoding) encode_string_to_string(src string) string {
75 return enc.encode(src.bytes()).bytestr()
76}
77
78// new_std_encoding creates a standard Base32 `Encoding` as defined in
79// RFC 4648.
80pub fn new_std_encoding() Encoding {
81 return new_encoding_with_padding(std_alphabet, std_padding)
82}
83
84// new_std_encoding creates a standard Base32 `Encoding` identical to
85// `new_std_encoding` but with a specified character `padding`,
86// or `no_padding` to disable padding.
87// The `padding` character must not be '\r' or '\n', must not
88// be contained in the `Encoding`'s alphabet and must be a rune equal or
89// below '\xff'.
90pub fn new_std_encoding_with_padding(padding u8) Encoding {
91 return new_encoding_with_padding(std_alphabet, padding)
92}
93
94// new_encoding returns a Base32 `Encoding` with standard
95// `alphabet`s and standard padding.
96pub fn new_encoding(alphabet []u8) Encoding {
97 return new_encoding_with_padding(alphabet, std_padding)
98}
99
100// new_encoding_with_padding returns a Base32 `Encoding` with specified
101// encoding `alphabet`s and a specified `padding_char`.
102// The `padding_char` must not be '\r' or '\n', must not
103// be contained in the `Encoding`'s alphabet and must be a rune equal or
104// below '\xff'.
105pub fn new_encoding_with_padding(alphabet []u8, padding_char u8) Encoding {
106 if padding_char == `\r` || padding_char == `\n` || padding_char > 0xff {
107 panic('invalid padding')
108 }
109
110 for i := 0; i < alphabet.len; i++ {
111 if alphabet[i] == padding_char {
112 panic('padding contained in alphabet')
113 }
114 }
115
116 mut decode_map := [256]u8{}
117 for i in 0 .. alphabet.len {
118 decode_map[alphabet[i]] = u8(i)
119 }
120
121 return Encoding{
122 alphabet: alphabet
123 padding_char: padding_char
124 decode_map: decode_map
125 }
126}
127
128// encode encodes `src` using the encoding `enc`, writing
129// and returning encoded_len(src.len) u8s.
130//
131// The encoding pads the output to a multiple of 8 u8s,
132// so encode is not appropriate for use on individual blocks
133// of a large data stream.
134fn (enc &Encoding) encode(src []u8) []u8 {
135 mut buf := []u8{len: enc.encoded_len(src.len)}
136 mut dst := unsafe { buf }
137 enc.encode_(src, mut dst)
138 return buf
139}
140
141// encode_ encodes `src` using the encoding `enc`, writing
142// encoded_len(src.len) u8s to `dst`.
143//
144// The encoding pads the output to a multiple of 8 u8s,
145// so encode_ is not appropriate for use on individual blocks
146// of a large data stream.
147fn (enc &Encoding) encode_(src_ []u8, mut dst []u8) {
148 mut src := unsafe { src_ }
149 for src.len > 0 {
150 mut b := [8]u8{}
151
152 // Unpack 8x 5-bit source blocks into a 5 u8
153 // destination quantum
154 if src.len > 4 {
155 b[7] = src[4] & 0x1F
156 b[6] = src[4] >> 5
157 }
158 if src.len >= 4 {
159 b[6] |= (src[3] << 3) & 0x1F
160 b[5] = (src[3] >> 2) & 0x1F
161 b[4] = src[3] >> 7
162 }
163 if src.len >= 3 {
164 b[4] |= (src[2] << 1) & 0x1F
165 b[3] = (src[2] >> 4) & 0x1F
166 }
167 if src.len >= 2 {
168 b[3] |= (src[1] << 4) & 0x1F
169 b[2] = (src[1] >> 1) & 0x1F
170 b[1] = (src[1] >> 6) & 0x1F
171 }
172 if src.len >= 1 {
173 b[1] |= (src[0] << 2) & 0x1F
174 b[0] = src[0] >> 3
175 }
176
177 // Encode 5-bit blocks using the base32 alphabet
178 if dst.len >= 8 {
179 // Common case, unrolled for extra performance
180 dst[0] = enc.alphabet[b[0] & 31]
181 dst[1] = enc.alphabet[b[1] & 31]
182 dst[2] = enc.alphabet[b[2] & 31]
183 dst[3] = enc.alphabet[b[3] & 31]
184 dst[4] = enc.alphabet[b[4] & 31]
185 dst[5] = enc.alphabet[b[5] & 31]
186 dst[6] = enc.alphabet[b[6] & 31]
187 dst[7] = enc.alphabet[b[7] & 31]
188 } else {
189 for i := 0; i < dst.len; i++ {
190 dst[i] = enc.alphabet[b[i] & 31]
191 }
192 }
193
194 // Pad the final quantum
195 if src.len < 5 {
196 if enc.padding_char == no_padding {
197 break
198 }
199
200 dst[7] = enc.padding_char
201 if src.len < 4 {
202 dst[6] = enc.padding_char
203 dst[5] = enc.padding_char
204 if src.len < 3 {
205 dst[4] = enc.padding_char
206 if src.len < 2 {
207 dst[3] = enc.padding_char
208 dst[2] = enc.padding_char
209 }
210 }
211 }
212
213 break
214 }
215 src = unsafe { src[5..] }
216 dst = unsafe { dst[8..] }
217 }
218}
219
220fn (enc &Encoding) encoded_len(n int) int {
221 if enc.padding_char == no_padding {
222 return (n * 8 + 4) / 5
223 }
224 return (n + 4) / 5 * 8
225}
226
227// decode_string decodes a V string `src` using Base32 with the encoding `enc`
228// and returns the decoded bytes or a `corrupt_input_error_msg` error.
229pub fn (enc &Encoding) decode_string(src string) ![]u8 {
230 return enc.decode(src.bytes())
231 // mut buf := strip_newlines(src.bytes())
232 // mut dst := unsafe { buf }
233 // // l := strip_newlines(mut buf)
234 // n, _ := enc.decode_(buf, mut dst)?
235 // return buf[..n]
236}
237
238// decode_string_to_string decodes a V string `src` using Base32 with the
239// encoding `enc` and returns the decoded V string or a `corrupt_input_error_msg` error.
240pub fn (enc &Encoding) decode_string_to_string(src string) !string {
241 decoded := enc.decode_string(src)!
242 return decoded.bytestr()
243}
244
245// decode decodes `src` using the encoding `enc`. It returns the decoded bytes
246// written or a `corrupt_input_error_msg` error.
247// New line characters (\r and \n) are ignored.
248pub fn (enc &Encoding) decode(src []u8) ![]u8 {
249 mut buf := []u8{len: src.len}
250 // mut dst := unsafe { buf }
251 // l := strip_newlines(mut dst, src)
252 // n, _ := enc.decode_(src[..l], mut dst) or {
253 // src := strip_newlines(src_)
254 n, _ := enc.decode_(src, mut buf) or { return err }
255 return buf[..n]
256}
257
258// decode_ returns the number of bytes written and a boolean value, which
259// indicates if end-of-message padding was encountered and thus any
260// additional data is an error. This method assumes that src has been
261// stripped of all supported whitespace (`\r` and `\n`).
262fn (enc &Encoding) decode_(src_ []u8, mut dst []u8) !(int, bool) {
263 mut src := unsafe { src_ }
264 mut n := 0
265 mut end := false
266 // Lift the nil check outside of the loop.
267 // _ = enc.decode_map
268
269 mut dsti := 0
270 olen := src.len
271
272 for src.len > 0 && !end {
273 // Decode quantum using the base32 alphabet
274 mut dbuf := [8]u8{}
275 mut dlen := 8
276
277 for j := 0; j < 8; {
278 if src.len == 0 {
279 if enc.padding_char != no_padding {
280 // We have reached the end and are missing padding
281 // return n, false, corrupt_input_error(olen - src.len - j)
282 return error(corrupt_input_error_msg(olen - src.len - j))
283 }
284 // We have reached the end and are not expecting any padding
285 dlen, end = j, true
286 break
287 }
288 in0 := src[0]
289 unsafe {
290 src = src[1..]
291 }
292 if in0 == enc.padding_char && j >= 2 && src.len < 8 {
293 // We`ve reached the end and there`s padding
294 if src.len + j < 8 - 1 {
295 // not enough padding
296 // return n, false, corrupt_input_error(olen)
297 return error(corrupt_input_error_msg(olen))
298 }
299 for k := 0; k < 8 - 1 - j; k++ {
300 if src.len > k && src[k] != enc.padding_char {
301 // incorrect padding
302 // return n, false, corrupt_input_error(olen - src.len + k - 1)
303 return error(corrupt_input_error_msg(olen - src.len + k - 1))
304 }
305 }
306 dlen, end = j, true
307 // 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not
308 // valid dlen values. See RFC 4648 Section 6 'Base 32 Encoding' listing
309 // the five valid padding lengths, and Section 9 'Illustrations and
310 // Examples' for an illustration for how the 1st, 3rd and 6th base32
311 // src u8s do not yield enough information to decode a dst u8.
312 if dlen == 1 || dlen == 3 || dlen == 6 {
313 // return n, false, corrupt_input_error(olen - src.len - 1)
314 return error(corrupt_input_error_msg(olen - src.len - 1))
315 }
316 break
317 }
318
319 dbuf[j] = enc.decode_map[in0]
320 if dbuf[j] == 0xFF {
321 // return n, false, corrupt_input_error(olen - src.len - 1)
322 return error(corrupt_input_error_msg(olen - src.len - 1))
323 }
324 j++
325 }
326
327 // Pack 8x 5-bit source blocks into 5 u8 destination
328 // quantum
329 if dlen == 8 {
330 dst[dsti + 4] = dbuf[6] << 5 | dbuf[7]
331 n++
332 }
333 if dlen >= 7 {
334 dst[dsti + 3] = dbuf[4] << 7 | dbuf[5] << 2 | dbuf[6] >> 3
335 n++
336 }
337 if dlen >= 5 {
338 dst[dsti + 2] = dbuf[3] << 4 | dbuf[4] >> 1
339 n++
340 }
341 if dlen >= 4 {
342 dst[dsti + 1] = dbuf[1] << 6 | dbuf[2] << 1 | dbuf[3] >> 4
343 n++
344 }
345 if dlen >= 2 {
346 dst[dsti + 0] = dbuf[0] << 3 | dbuf[1] >> 2
347 n++
348 }
349 dsti += 5
350 }
351 return n, end
352}
353
354// strip_newlines removes newline characters and returns the number
355// of non-newline characters copied to dst.
356// fn strip_newlines(mut dst []u8, src []u8) int {
357// mut offset := 0
358// for b in src {
359// if b in [`\r`, `\n`] {
360// continue
361// }
362// dst[offset] = b
363// offset++
364// }
365// return offset
366// }
367fn strip_newlines(src []u8) []u8 {
368 mut dst := []u8{}
369 for b in src {
370 if b in [`\r`, `\n`] {
371 continue
372 }
373 dst << b
374 }
375 return dst
376}
377
378fn corrupt_input_error_msg(e int) string {
379 // return error('illegal base32 data at input byte ' + strconv.FormatInt(int64(e), 10)
380 return 'illegal base32 data at input byte ${e}'
381}
382