| 1 | // Copyright (c) 2019-2026 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module s3 |
| 5 | |
| 6 | import crypto.hmac |
| 7 | import crypto.sha256 |
| 8 | import time |
| 9 | |
| 10 | // service_name is the SigV4 service identifier baked into the signing key. |
| 11 | pub const service_name = 's3' |
| 12 | |
| 13 | // algo is the SigV4 algorithm marker S3 expects. |
| 14 | pub const algo = 'AWS4-HMAC-SHA256' |
| 15 | |
| 16 | // unsigned_payload is the magic string used in `x-amz-content-sha256` when |
| 17 | // the payload is not pre-hashed. Used by the single-shot put path to avoid |
| 18 | // buffering / re-scanning the entire body just to sign it. |
| 19 | pub const unsigned_payload = 'UNSIGNED-PAYLOAD' |
| 20 | |
| 21 | // empty_sha256 is `sha256("")` precomputed. Used for HEAD/GET/DELETE where |
| 22 | // there is no body and we want a real hash for stricter S3 endpoints. |
| 23 | pub const empty_sha256 = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' |
| 24 | |
| 25 | // SignRequest describes the request to sign. The signer is intentionally |
| 26 | // agnostic of HTTP option semantics (ACL, storage class, …) — the caller |
| 27 | // pre-fills `extra_headers` with whatever it intends to send. That keeps the |
| 28 | // signer pure and easy to test against published SigV4 reference vectors. |
| 29 | pub struct SignRequest { |
| 30 | pub: |
| 31 | method string @[required] // GET, PUT, POST, DELETE, HEAD |
| 32 | path string @[required] // canonical URI (already URI-encoded, kept as-is) |
| 33 | query string // canonical query string (sorted, encoded; no leading '?') |
| 34 | payload_hash string // hex SHA-256 of body or `unsigned_payload` |
| 35 | extra_headers map[string]string // lowercase keys, raw values — will be added to canonical/signed set |
| 36 | sign_time time.Time // when omitted (Time{}) we use time.utc() |
| 37 | } |
| 38 | |
| 39 | // SignedRequest is the output of header signing. |
| 40 | pub struct SignedRequest { |
| 41 | pub: |
| 42 | method string // canonical HTTP method that was signed (GET, PUT, …) |
| 43 | url string // scheme://host<extra_path>/<bucket>/<key>?<query> |
| 44 | host string // host[:port], suitable for the Host header |
| 45 | amz_date string // YYYYMMDDTHHMMSSZ |
| 46 | authorization string // ready-to-send Authorization header value |
| 47 | headers map[string]string // ALL headers that MUST be sent for the signature to verify |
| 48 | } |
| 49 | |
| 50 | // sign_request builds the SigV4 Authorization header for an HTTP request. |
| 51 | // |
| 52 | // The returned `SignedRequest.headers` is the full set the caller must send |
| 53 | // (excluding `Content-Length`, which the HTTP client adds itself). Adding, |
| 54 | // removing, or mutating a header after signing will break the signature. |
| 55 | pub fn sign_request(creds Credentials, req SignRequest) !SignedRequest { |
| 56 | creds.validate()! |
| 57 | method := normalize_method(req.method) or { |
| 58 | return new_error('InvalidMethod', |
| 59 | 'Method must be GET, PUT, POST, DELETE or HEAD; got: ${req.method}') |
| 60 | } |
| 61 | host := canonical_host(creds, '') |
| 62 | if host == '' { |
| 63 | return new_error('InvalidEndpoint', |
| 64 | 'No endpoint and no bucket provided — cannot determine host') |
| 65 | } |
| 66 | region := creds.resolved_region() |
| 67 | now := if req.sign_time.year == 0 { time.utc() } else { req.sign_time } |
| 68 | amz_date := format_amz_date(now) |
| 69 | amz_day := amz_date[..8] |
| 70 | payload := if req.payload_hash == '' { unsigned_payload } else { req.payload_hash } |
| 71 | |
| 72 | // Build the headers set. We always sign host + amz date + content sha |
| 73 | // because S3 requires them. Reject CRLF before they reach the wire. |
| 74 | mut headers := map[string]string{} |
| 75 | for k, v in req.extra_headers { |
| 76 | if contains_crlf(v) || contains_crlf(k) { |
| 77 | return new_error('InvalidHeader', |
| 78 | 'Header "${k}" contains CR/LF — refused (header injection guard)') |
| 79 | } |
| 80 | headers[k.to_lower()] = v |
| 81 | } |
| 82 | headers['host'] = host |
| 83 | headers['x-amz-content-sha256'] = payload |
| 84 | headers['x-amz-date'] = amz_date |
| 85 | if creds.session_token != '' { |
| 86 | headers['x-amz-security-token'] = creds.session_token |
| 87 | } |
| 88 | |
| 89 | signed_header_names := sorted_keys(headers).join(';') |
| 90 | canonical := build_canonical_request(method, req.path, req.query, headers, signed_header_names, |
| 91 | payload) |
| 92 | credential_scope := '${amz_day}/${region}/${service_name}/aws4_request' |
| 93 | string_to_sign := '${algo}\n${amz_date}\n${credential_scope}\n${sha256_hex(canonical.bytes())}' |
| 94 | signature := to_hex_lower(hmac_sha256(derive_signing_key(creds.secret_access_key, amz_day, |
| 95 | region, service_name), string_to_sign.bytes())) |
| 96 | authorization := '${algo} Credential=${creds.access_key_id}/${credential_scope}, SignedHeaders=${signed_header_names}, Signature=${signature}' |
| 97 | headers['authorization'] = authorization |
| 98 | |
| 99 | scheme := creds.scheme() |
| 100 | mut url := '${scheme}://${host}${req.path}' |
| 101 | if req.query != '' { |
| 102 | url += '?' + req.query |
| 103 | } |
| 104 | return SignedRequest{ |
| 105 | method: method |
| 106 | url: url |
| 107 | host: host |
| 108 | amz_date: amz_date |
| 109 | authorization: authorization |
| 110 | headers: headers |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | // PresignRequest describes a presigned URL to generate. The output is a |
| 115 | // self-contained URL — no extra headers are required at request time. |
| 116 | pub struct PresignRequest { |
| 117 | pub: |
| 118 | method string @[required] |
| 119 | path string @[required] // canonical URI (already URI-encoded) |
| 120 | expires_in int = 86400 // 1..604800 seconds (SigV4 hard limit is 7 days) |
| 121 | extra_query map[string]string // additional signed query params (e.g. response-content-type, x-amz-acl) |
| 122 | sign_time time.Time |
| 123 | } |
| 124 | |
| 125 | // presign_url returns a fully-formed `https://...` URL signed via SigV4 |
| 126 | // query-string parameters. The URL is valid until `now + expires_in`. |
| 127 | pub fn presign_url(creds Credentials, req PresignRequest) !string { |
| 128 | creds.validate()! |
| 129 | method := normalize_method(req.method) or { |
| 130 | return new_error('InvalidMethod', |
| 131 | 'Method must be GET, PUT, POST, DELETE or HEAD; got: ${req.method}') |
| 132 | } |
| 133 | if req.expires_in < 1 || req.expires_in > 604800 { |
| 134 | return new_error('InvalidExpiry', 'expires_in must be between 1 and 604800 seconds') |
| 135 | } |
| 136 | host := canonical_host(creds, '') |
| 137 | if host == '' { |
| 138 | return new_error('InvalidEndpoint', |
| 139 | 'No endpoint and no bucket provided — cannot determine host') |
| 140 | } |
| 141 | region := creds.resolved_region() |
| 142 | now := if req.sign_time.year == 0 { time.utc() } else { req.sign_time } |
| 143 | amz_date := format_amz_date(now) |
| 144 | amz_day := amz_date[..8] |
| 145 | credential := '${creds.access_key_id}/${amz_day}/${region}/${service_name}/aws4_request' |
| 146 | |
| 147 | // Required signed query parameters; we then merge in extras from the caller. |
| 148 | mut params := map[string]string{} |
| 149 | params['X-Amz-Algorithm'] = algo |
| 150 | params['X-Amz-Credential'] = credential |
| 151 | params['X-Amz-Date'] = amz_date |
| 152 | params['X-Amz-Expires'] = req.expires_in.str() |
| 153 | params['X-Amz-SignedHeaders'] = 'host' |
| 154 | if creds.session_token != '' { |
| 155 | params['X-Amz-Security-Token'] = creds.session_token |
| 156 | } |
| 157 | for k, v in req.extra_query { |
| 158 | if contains_crlf(k) || contains_crlf(v) { |
| 159 | return new_error('InvalidQueryParam', 'Query param "${k}" contains CR/LF — refused') |
| 160 | } |
| 161 | params[k] = v |
| 162 | } |
| 163 | |
| 164 | canonical_query := canonical_query_string(params) |
| 165 | headers := { |
| 166 | 'host': host |
| 167 | } |
| 168 | canonical := build_canonical_request(method, req.path, canonical_query, headers, 'host', |
| 169 | unsigned_payload) |
| 170 | credential_scope := '${amz_day}/${region}/${service_name}/aws4_request' |
| 171 | string_to_sign := '${algo}\n${amz_date}\n${credential_scope}\n${sha256_hex(canonical.bytes())}' |
| 172 | signature := to_hex_lower(hmac_sha256(derive_signing_key(creds.secret_access_key, amz_day, |
| 173 | region, service_name), string_to_sign.bytes())) |
| 174 | scheme := creds.scheme() |
| 175 | return '${scheme}://${host}${req.path}?${canonical_query}&X-Amz-Signature=${signature}' |
| 176 | } |
| 177 | |
| 178 | // build_canonical_request assembles the canonical request string per |
| 179 | // SigV4 §3.2. `path` and `query` MUST already be URI-encoded. |
| 180 | pub fn build_canonical_request(method string, path string, query string, headers map[string]string, |
| 181 | signed_headers string, payload_hash string) string { |
| 182 | sorted := sorted_keys(headers) |
| 183 | mut lines := []string{cap: sorted.len} |
| 184 | for k in sorted { |
| 185 | lines << '${k}:${normalize_header_value(headers[k])}' |
| 186 | } |
| 187 | return '${method}\n${path}\n${query}\n${lines.join('\n')}\n\n${signed_headers}\n${payload_hash}' |
| 188 | } |
| 189 | |
| 190 | // canonical_query_string sorts query params by key (then by value if the same |
| 191 | // key appears twice — we don't here) and joins them as `k=v&k=v` with values |
| 192 | // already URI-encoded. |
| 193 | pub fn canonical_query_string(params map[string]string) string { |
| 194 | keys := sorted_keys(params) |
| 195 | mut parts := []string{cap: keys.len} |
| 196 | for k in keys { |
| 197 | parts << '${uri_encode_query(k)}=${uri_encode_query(params[k])}' |
| 198 | } |
| 199 | return parts.join('&') |
| 200 | } |
| 201 | |
| 202 | // derive_signing_key implements the four-step HMAC chain that produces the |
| 203 | // SigV4 signing key. The result is cacheable per (secret, date, region, |
| 204 | // service) tuple — left to the caller to memoize if needed. |
| 205 | pub fn derive_signing_key(secret string, date string, region string, service string) []u8 { |
| 206 | k_date := hmac_sha256(('AWS4' + secret).bytes(), date.bytes()) |
| 207 | k_region := hmac_sha256(k_date, region.bytes()) |
| 208 | k_service := hmac_sha256(k_region, service.bytes()) |
| 209 | return hmac_sha256(k_service, 'aws4_request'.bytes()) |
| 210 | } |
| 211 | |
| 212 | // format_amz_date returns the basic ISO-8601 timestamp used by SigV4 |
| 213 | // (`YYYYMMDDTHHMMSSZ`). `t` is assumed to be in UTC. |
| 214 | pub fn format_amz_date(t time.Time) string { |
| 215 | return '${t.year:04d}${t.month:02d}${t.day:02d}T${t.hour:02d}${t.minute:02d}${t.second:02d}Z' |
| 216 | } |
| 217 | |
| 218 | // canonical_host resolves the on-the-wire host. Path-style addressing is |
| 219 | // the default; virtual-hosted style uses `<bucket>.<endpoint-host>`. If no |
| 220 | // endpoint is configured we fall back to `s3.<region>.amazonaws.com` (the |
| 221 | // canonical default for clients pointed at AWS S3 itself). |
| 222 | pub fn canonical_host(creds Credentials, bucket_override string) string { |
| 223 | bucket := if bucket_override != '' { bucket_override } else { creds.bucket } |
| 224 | host := creds.host_only() |
| 225 | if host == '' { |
| 226 | region := creds.resolved_region() |
| 227 | if creds.virtual_hosted_style { |
| 228 | if bucket == '' { |
| 229 | return '' |
| 230 | } |
| 231 | return '${bucket}.s3.${region}.amazonaws.com' |
| 232 | } |
| 233 | return 's3.${region}.amazonaws.com' |
| 234 | } |
| 235 | if creds.virtual_hosted_style && bucket != '' { |
| 236 | return '${bucket}.${host}' |
| 237 | } |
| 238 | return host |
| 239 | } |
| 240 | |
| 241 | // normalize_method uppercases and validates the HTTP method. |
| 242 | pub fn normalize_method(m string) ?string { |
| 243 | up := m.to_upper() |
| 244 | return match up { |
| 245 | 'GET', 'PUT', 'POST', 'DELETE', 'HEAD' { up } |
| 246 | else { none } |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | // normalize_header_value collapses runs of internal whitespace to a single |
| 251 | // space and trims leading/trailing whitespace, per SigV4 §3.2.2 step 3. |
| 252 | fn normalize_header_value(v string) string { |
| 253 | if v == '' { |
| 254 | return v |
| 255 | } |
| 256 | mut out := []u8{cap: v.len} |
| 257 | mut prev_space := false |
| 258 | mut started := false |
| 259 | for b in v.bytes() { |
| 260 | if b == ` ` || b == `\t` { |
| 261 | if started { |
| 262 | prev_space = true |
| 263 | } |
| 264 | continue |
| 265 | } |
| 266 | if prev_space { |
| 267 | out << ` ` |
| 268 | prev_space = false |
| 269 | } |
| 270 | out << b |
| 271 | started = true |
| 272 | } |
| 273 | return out.bytestr() |
| 274 | } |
| 275 | |
| 276 | // sha256_hex returns the lowercase hex digest of `data`. |
| 277 | @[inline] |
| 278 | pub fn sha256_hex(data []u8) string { |
| 279 | return to_hex_lower(sha256.sum256(data)) |
| 280 | } |
| 281 | |
| 282 | // hmac_sha256 wraps `crypto.hmac.new` for HMAC-SHA-256 with the V stdlib's |
| 283 | // type signature (a hash function that returns `[]u8`). |
| 284 | @[inline] |
| 285 | pub fn hmac_sha256(key []u8, data []u8) []u8 { |
| 286 | return hmac.new(key, data, sha256.sum256, sha256.block_size) |
| 287 | } |
| 288 | |
| 289 | // sorted_keys returns the keys of `m` in lexical order. |
| 290 | fn sorted_keys(m map[string]string) []string { |
| 291 | mut keys := m.keys() |
| 292 | keys.sort() |
| 293 | return keys |
| 294 | } |
| 295 | |