v / vlib / net / s3 / signer.v
294 lines · 269 sloc · 11.08 KB · 4142432483c4e8de44ab7b0d6ac944f3251e03c8
Raw
1// Copyright (c) 2019-2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module s3
5
6import crypto.hmac
7import crypto.sha256
8import time
9
10// service_name is the SigV4 service identifier baked into the signing key.
11pub const service_name = 's3'
12
13// algo is the SigV4 algorithm marker S3 expects.
14pub const algo = 'AWS4-HMAC-SHA256'
15
16// unsigned_payload is the magic string used in `x-amz-content-sha256` when
17// the payload is not pre-hashed. Used by the single-shot put path to avoid
18// buffering / re-scanning the entire body just to sign it.
19pub const unsigned_payload = 'UNSIGNED-PAYLOAD'
20
21// empty_sha256 is `sha256("")` precomputed. Used for HEAD/GET/DELETE where
22// there is no body and we want a real hash for stricter S3 endpoints.
23pub const empty_sha256 = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
24
25// SignRequest describes the request to sign. The signer is intentionally
26// agnostic of HTTP option semantics (ACL, storage class, …) — the caller
27// pre-fills `extra_headers` with whatever it intends to send. That keeps the
28// signer pure and easy to test against published SigV4 reference vectors.
29pub struct SignRequest {
30pub:
31 method string @[required] // GET, PUT, POST, DELETE, HEAD
32 path string @[required] // canonical URI (already URI-encoded, kept as-is)
33 query string // canonical query string (sorted, encoded; no leading '?')
34 payload_hash string // hex SHA-256 of body or `unsigned_payload`
35 extra_headers map[string]string // lowercase keys, raw values — will be added to canonical/signed set
36 sign_time time.Time // when omitted (Time{}) we use time.utc()
37}
38
39// SignedRequest is the output of header signing.
40pub struct SignedRequest {
41pub:
42 method string // canonical HTTP method that was signed (GET, PUT, …)
43 url string // scheme://host<extra_path>/<bucket>/<key>?<query>
44 host string // host[:port], suitable for the Host header
45 amz_date string // YYYYMMDDTHHMMSSZ
46 authorization string // ready-to-send Authorization header value
47 headers map[string]string // ALL headers that MUST be sent for the signature to verify
48}
49
50// sign_request builds the SigV4 Authorization header for an HTTP request.
51//
52// The returned `SignedRequest.headers` is the full set the caller must send
53// (excluding `Content-Length`, which the HTTP client adds itself). Adding,
54// removing, or mutating a header after signing will break the signature.
55pub fn sign_request(creds Credentials, req SignRequest) !SignedRequest {
56 creds.validate()!
57 method := normalize_method(req.method) or {
58 return new_error('InvalidMethod',
59 'Method must be GET, PUT, POST, DELETE or HEAD; got: ${req.method}')
60 }
61 host := canonical_host(creds, '')
62 if host == '' {
63 return new_error('InvalidEndpoint',
64 'No endpoint and no bucket provided — cannot determine host')
65 }
66 region := creds.resolved_region()
67 now := if req.sign_time.year == 0 { time.utc() } else { req.sign_time }
68 amz_date := format_amz_date(now)
69 amz_day := amz_date[..8]
70 payload := if req.payload_hash == '' { unsigned_payload } else { req.payload_hash }
71
72 // Build the headers set. We always sign host + amz date + content sha
73 // because S3 requires them. Reject CRLF before they reach the wire.
74 mut headers := map[string]string{}
75 for k, v in req.extra_headers {
76 if contains_crlf(v) || contains_crlf(k) {
77 return new_error('InvalidHeader',
78 'Header "${k}" contains CR/LF — refused (header injection guard)')
79 }
80 headers[k.to_lower()] = v
81 }
82 headers['host'] = host
83 headers['x-amz-content-sha256'] = payload
84 headers['x-amz-date'] = amz_date
85 if creds.session_token != '' {
86 headers['x-amz-security-token'] = creds.session_token
87 }
88
89 signed_header_names := sorted_keys(headers).join(';')
90 canonical := build_canonical_request(method, req.path, req.query, headers, signed_header_names,
91 payload)
92 credential_scope := '${amz_day}/${region}/${service_name}/aws4_request'
93 string_to_sign := '${algo}\n${amz_date}\n${credential_scope}\n${sha256_hex(canonical.bytes())}'
94 signature := to_hex_lower(hmac_sha256(derive_signing_key(creds.secret_access_key, amz_day,
95 region, service_name), string_to_sign.bytes()))
96 authorization := '${algo} Credential=${creds.access_key_id}/${credential_scope}, SignedHeaders=${signed_header_names}, Signature=${signature}'
97 headers['authorization'] = authorization
98
99 scheme := creds.scheme()
100 mut url := '${scheme}://${host}${req.path}'
101 if req.query != '' {
102 url += '?' + req.query
103 }
104 return SignedRequest{
105 method: method
106 url: url
107 host: host
108 amz_date: amz_date
109 authorization: authorization
110 headers: headers
111 }
112}
113
114// PresignRequest describes a presigned URL to generate. The output is a
115// self-contained URL — no extra headers are required at request time.
116pub struct PresignRequest {
117pub:
118 method string @[required]
119 path string @[required] // canonical URI (already URI-encoded)
120 expires_in int = 86400 // 1..604800 seconds (SigV4 hard limit is 7 days)
121 extra_query map[string]string // additional signed query params (e.g. response-content-type, x-amz-acl)
122 sign_time time.Time
123}
124
125// presign_url returns a fully-formed `https://...` URL signed via SigV4
126// query-string parameters. The URL is valid until `now + expires_in`.
127pub fn presign_url(creds Credentials, req PresignRequest) !string {
128 creds.validate()!
129 method := normalize_method(req.method) or {
130 return new_error('InvalidMethod',
131 'Method must be GET, PUT, POST, DELETE or HEAD; got: ${req.method}')
132 }
133 if req.expires_in < 1 || req.expires_in > 604800 {
134 return new_error('InvalidExpiry', 'expires_in must be between 1 and 604800 seconds')
135 }
136 host := canonical_host(creds, '')
137 if host == '' {
138 return new_error('InvalidEndpoint',
139 'No endpoint and no bucket provided — cannot determine host')
140 }
141 region := creds.resolved_region()
142 now := if req.sign_time.year == 0 { time.utc() } else { req.sign_time }
143 amz_date := format_amz_date(now)
144 amz_day := amz_date[..8]
145 credential := '${creds.access_key_id}/${amz_day}/${region}/${service_name}/aws4_request'
146
147 // Required signed query parameters; we then merge in extras from the caller.
148 mut params := map[string]string{}
149 params['X-Amz-Algorithm'] = algo
150 params['X-Amz-Credential'] = credential
151 params['X-Amz-Date'] = amz_date
152 params['X-Amz-Expires'] = req.expires_in.str()
153 params['X-Amz-SignedHeaders'] = 'host'
154 if creds.session_token != '' {
155 params['X-Amz-Security-Token'] = creds.session_token
156 }
157 for k, v in req.extra_query {
158 if contains_crlf(k) || contains_crlf(v) {
159 return new_error('InvalidQueryParam', 'Query param "${k}" contains CR/LF — refused')
160 }
161 params[k] = v
162 }
163
164 canonical_query := canonical_query_string(params)
165 headers := {
166 'host': host
167 }
168 canonical := build_canonical_request(method, req.path, canonical_query, headers, 'host',
169 unsigned_payload)
170 credential_scope := '${amz_day}/${region}/${service_name}/aws4_request'
171 string_to_sign := '${algo}\n${amz_date}\n${credential_scope}\n${sha256_hex(canonical.bytes())}'
172 signature := to_hex_lower(hmac_sha256(derive_signing_key(creds.secret_access_key, amz_day,
173 region, service_name), string_to_sign.bytes()))
174 scheme := creds.scheme()
175 return '${scheme}://${host}${req.path}?${canonical_query}&X-Amz-Signature=${signature}'
176}
177
178// build_canonical_request assembles the canonical request string per
179// SigV4 §3.2. `path` and `query` MUST already be URI-encoded.
180pub fn build_canonical_request(method string, path string, query string, headers map[string]string,
181 signed_headers string, payload_hash string) string {
182 sorted := sorted_keys(headers)
183 mut lines := []string{cap: sorted.len}
184 for k in sorted {
185 lines << '${k}:${normalize_header_value(headers[k])}'
186 }
187 return '${method}\n${path}\n${query}\n${lines.join('\n')}\n\n${signed_headers}\n${payload_hash}'
188}
189
190// canonical_query_string sorts query params by key (then by value if the same
191// key appears twice — we don't here) and joins them as `k=v&k=v` with values
192// already URI-encoded.
193pub fn canonical_query_string(params map[string]string) string {
194 keys := sorted_keys(params)
195 mut parts := []string{cap: keys.len}
196 for k in keys {
197 parts << '${uri_encode_query(k)}=${uri_encode_query(params[k])}'
198 }
199 return parts.join('&')
200}
201
202// derive_signing_key implements the four-step HMAC chain that produces the
203// SigV4 signing key. The result is cacheable per (secret, date, region,
204// service) tuple — left to the caller to memoize if needed.
205pub fn derive_signing_key(secret string, date string, region string, service string) []u8 {
206 k_date := hmac_sha256(('AWS4' + secret).bytes(), date.bytes())
207 k_region := hmac_sha256(k_date, region.bytes())
208 k_service := hmac_sha256(k_region, service.bytes())
209 return hmac_sha256(k_service, 'aws4_request'.bytes())
210}
211
212// format_amz_date returns the basic ISO-8601 timestamp used by SigV4
213// (`YYYYMMDDTHHMMSSZ`). `t` is assumed to be in UTC.
214pub fn format_amz_date(t time.Time) string {
215 return '${t.year:04d}${t.month:02d}${t.day:02d}T${t.hour:02d}${t.minute:02d}${t.second:02d}Z'
216}
217
218// canonical_host resolves the on-the-wire host. Path-style addressing is
219// the default; virtual-hosted style uses `<bucket>.<endpoint-host>`. If no
220// endpoint is configured we fall back to `s3.<region>.amazonaws.com` (the
221// canonical default for clients pointed at AWS S3 itself).
222pub fn canonical_host(creds Credentials, bucket_override string) string {
223 bucket := if bucket_override != '' { bucket_override } else { creds.bucket }
224 host := creds.host_only()
225 if host == '' {
226 region := creds.resolved_region()
227 if creds.virtual_hosted_style {
228 if bucket == '' {
229 return ''
230 }
231 return '${bucket}.s3.${region}.amazonaws.com'
232 }
233 return 's3.${region}.amazonaws.com'
234 }
235 if creds.virtual_hosted_style && bucket != '' {
236 return '${bucket}.${host}'
237 }
238 return host
239}
240
241// normalize_method uppercases and validates the HTTP method.
242pub fn normalize_method(m string) ?string {
243 up := m.to_upper()
244 return match up {
245 'GET', 'PUT', 'POST', 'DELETE', 'HEAD' { up }
246 else { none }
247 }
248}
249
250// normalize_header_value collapses runs of internal whitespace to a single
251// space and trims leading/trailing whitespace, per SigV4 §3.2.2 step 3.
252fn normalize_header_value(v string) string {
253 if v == '' {
254 return v
255 }
256 mut out := []u8{cap: v.len}
257 mut prev_space := false
258 mut started := false
259 for b in v.bytes() {
260 if b == ` ` || b == `\t` {
261 if started {
262 prev_space = true
263 }
264 continue
265 }
266 if prev_space {
267 out << ` `
268 prev_space = false
269 }
270 out << b
271 started = true
272 }
273 return out.bytestr()
274}
275
276// sha256_hex returns the lowercase hex digest of `data`.
277@[inline]
278pub fn sha256_hex(data []u8) string {
279 return to_hex_lower(sha256.sum256(data))
280}
281
282// hmac_sha256 wraps `crypto.hmac.new` for HMAC-SHA-256 with the V stdlib's
283// type signature (a hash function that returns `[]u8`).
284@[inline]
285pub fn hmac_sha256(key []u8, data []u8) []u8 {
286 return hmac.new(key, data, sha256.sum256, sha256.block_size)
287}
288
289// sorted_keys returns the keys of `m` in lexical order.
290fn sorted_keys(m map[string]string) []string {
291 mut keys := m.keys()
292 keys.sort()
293 return keys
294}
295