v / vlib / net / s3 / client.v
405 lines · 382 sloc · 12.49 KB · ddd4ac7880f392ad6c39719af05f01d29eb0ad3a
Raw
1// Copyright (c) 2019-2026 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module s3
5
6import net.http
7import time
8
9// Client is the entry point for S3 operations. It carries default credentials
10// and tuneable HTTP behaviour; per-call overrides go through the option params
11// of each method. Instantiate once, reuse for many objects.
12@[heap]
13pub struct Client {
14pub:
15 credentials Credentials
16 // part_size is the multipart-upload chunk size in bytes (default 5 MiB,
17 // the S3 minimum).
18 part_size i64 = 5 * 1024 * 1024
19 // queue_size is the intended parallel-upload concurrency for multipart
20 // (currently sequential; reserved).
21 queue_size int = 5
22 // retry is the number of retry attempts for failed uploads.
23 retry int = 3
24 // read_timeout / write_timeout map onto V's net.http settings.
25 // Defaults are generous because parts can be 5 MiB+ on slow links.
26 read_timeout i64 = 5 * 60 * time.second
27 write_timeout i64 = 5 * 60 * time.second
28}
29
30// new_client builds a Client. Pass an empty Credentials to fall back to env vars.
31pub fn new_client(creds Credentials) Client {
32 resolved := if creds.access_key_id == '' && creds.secret_access_key == '' {
33 Credentials.from_env().merge(creds)
34 } else {
35 creds
36 }
37 return Client{
38 credentials: resolved
39 }
40}
41
42// PutOptions configures a put / write call.
43@[params]
44pub struct PutOptions {
45pub:
46 bucket string
47 content_type string
48 content_disposition string
49 content_encoding string
50 cache_control string
51 acl Acl
52 storage_class StorageClass
53 request_payer bool
54 // hash_payload, when true, computes SHA-256 of the body before signing
55 // instead of using `UNSIGNED-PAYLOAD`. Slightly stronger integrity guarantee
56 // at the cost of one full body scan.
57 hash_payload bool
58}
59
60// GetOptions configures a get / read call.
61@[params]
62pub struct GetOptions {
63pub:
64 bucket string
65 range string // e.g. 'bytes=0-1023' for partial downloads
66 version_id string
67 request_payer bool
68}
69
70// StatOptions configures stat / size / exists checks.
71@[params]
72pub struct StatOptions {
73pub:
74 bucket string
75 request_payer bool
76}
77
78// put uploads `data` to `key`. Use `upload_file` / `upload_bytes_multipart`
79// for streaming or for files larger than ~100 MiB; `put` keeps everything in
80// memory.
81pub fn (c &Client) put(key string, data []u8, opts PutOptions) ! {
82 creds := c.creds_for(opts.bucket)
83 host := canonical_host(creds, opts.bucket)
84 if host == '' {
85 return new_error('InvalidEndpoint', 'cannot determine S3 host (set endpoint or bucket)')
86 }
87 path := build_object_path(creds, opts.bucket, key)!
88 payload_hash := if opts.hash_payload { sha256_hex(data) } else { unsigned_payload }
89 signed := sign_request(creds, SignRequest{
90 method: 'PUT'
91 path: path
92 payload_hash: payload_hash
93 extra_headers: put_object_headers(opts)
94 })!
95 resp := c.do_http(signed, data.bytestr())!
96 if resp.status_code !in [200, 204] {
97 return new_http_error(resp.status_code, key, resp.body)
98 }
99}
100
101// put_object_headers maps `PutOptions` to the wire headers used by both the
102// single-shot `put` and the multipart `initiate_multipart` paths. Keeping the
103// mapping in one place ensures the two upload modes accept the exact same set
104// of options without drift.
105fn put_object_headers(opts PutOptions) map[string]string {
106 mut headers := map[string]string{}
107 if opts.content_type != '' {
108 headers['content-type'] = opts.content_type
109 }
110 if opts.content_disposition != '' {
111 headers['content-disposition'] = opts.content_disposition
112 }
113 if opts.content_encoding != '' {
114 headers['content-encoding'] = opts.content_encoding
115 }
116 if opts.cache_control != '' {
117 headers['cache-control'] = opts.cache_control
118 }
119 if opts.acl != .unset {
120 headers['x-amz-acl'] = opts.acl.to_header_value()
121 }
122 if opts.storage_class != .unset {
123 headers['x-amz-storage-class'] = opts.storage_class.to_header_value()
124 }
125 if opts.request_payer {
126 headers['x-amz-request-payer'] = 'requester'
127 }
128 return headers
129}
130
131// get downloads the entire object body into memory. For large files set a
132// `range` and assemble the result yourself, or use a presigned URL with an
133// HTTP streaming client.
134pub fn (c &Client) get(key string, opts GetOptions) ![]u8 {
135 creds := c.creds_for(opts.bucket)
136 path := build_object_path(creds, opts.bucket, key)!
137 mut headers := map[string]string{}
138 if opts.range != '' {
139 headers['range'] = opts.range
140 }
141 if opts.request_payer {
142 headers['x-amz-request-payer'] = 'requester'
143 }
144 mut query := ''
145 if opts.version_id != '' {
146 query = 'versionId=' + uri_encode_query(opts.version_id)
147 }
148 signed := sign_request(creds, SignRequest{
149 method: 'GET'
150 path: path
151 query: query
152 payload_hash: empty_sha256
153 extra_headers: headers
154 })!
155 resp := c.do_http(signed, '')!
156 if resp.status_code !in [200, 206] {
157 return new_http_error(resp.status_code, key, resp.body)
158 }
159 return resp.body.bytes()
160}
161
162// get_string is a convenience wrapper around `get` that returns the body as a
163// V `string`.
164pub fn (c &Client) get_string(key string, opts GetOptions) !string {
165 bytes := c.get(key, opts)!
166 return bytes.bytestr()
167}
168
169// stat returns the object metadata (size / last_modified / etag / content_type).
170// Returns an `S3Error` with code `NoSuchKey` when the object doesn't exist.
171pub fn (c &Client) stat(key string, opts StatOptions) !Stat {
172 creds := c.creds_for(opts.bucket)
173 path := build_object_path(creds, opts.bucket, key)!
174 mut headers := map[string]string{}
175 if opts.request_payer {
176 headers['x-amz-request-payer'] = 'requester'
177 }
178 signed := sign_request(creds, SignRequest{
179 method: 'HEAD'
180 path: path
181 payload_hash: empty_sha256
182 extra_headers: headers
183 })!
184 resp := c.do_http(signed, '')!
185 if resp.status_code == 404 {
186 return new_error('NoSuchKey', 'No such key: ${key}')
187 }
188 if resp.status_code !in [200, 204] {
189 return new_http_error(resp.status_code, key, resp.body)
190 }
191 return Stat{
192 size: resp.header.get(.content_length) or { '0' }.i64()
193 last_modified: resp.header.get(.last_modified) or { '' }
194 etag: (resp.header.get(.etag) or { '' }).trim('"')
195 content_type: resp.header.get(.content_type) or { '' }
196 }
197}
198
199// exists is `stat` + boolean: true on 200, false on 404, error otherwise.
200pub fn (c &Client) exists(key string, opts StatOptions) !bool {
201 c.stat(key, opts) or {
202 if err is S3Error && err.code == 'NoSuchKey' {
203 return false
204 }
205 return err
206 }
207 return true
208}
209
210// size returns just the Content-Length of the object.
211pub fn (c &Client) size(key string, opts StatOptions) !i64 {
212 st := c.stat(key, opts)!
213 return st.size
214}
215
216// delete removes a single object. S3 returns 204 on success, 204 on
217// already-absent (idempotent) — we surface success in both cases.
218pub fn (c &Client) delete(key string, opts StatOptions) ! {
219 creds := c.creds_for(opts.bucket)
220 path := build_object_path(creds, opts.bucket, key)!
221 mut headers := map[string]string{}
222 if opts.request_payer {
223 headers['x-amz-request-payer'] = 'requester'
224 }
225 signed := sign_request(creds, SignRequest{
226 method: 'DELETE'
227 path: path
228 payload_hash: empty_sha256
229 extra_headers: headers
230 })!
231 resp := c.do_http(signed, '')!
232 if resp.status_code !in [200, 204] {
233 return new_http_error(resp.status_code, key, resp.body)
234 }
235}
236
237// presign generates a presigned URL — see PresignOptions for tunables.
238pub fn (c &Client) presign(key string, opts PresignOptions) !string {
239 creds := c.creds_for(opts.bucket)
240 path := build_object_path(creds, opts.bucket, key)!
241 mut extra := map[string]string{}
242 if opts.acl != .unset {
243 extra['X-Amz-Acl'] = opts.acl.to_header_value()
244 }
245 if opts.storage_class != .unset {
246 extra['x-amz-storage-class'] = opts.storage_class.to_header_value()
247 }
248 if opts.content_type != '' {
249 extra['response-content-type'] = opts.content_type
250 }
251 if opts.content_disposition != '' {
252 extra['response-content-disposition'] = opts.content_disposition
253 }
254 if opts.request_payer {
255 extra['x-amz-request-payer'] = 'requester'
256 }
257 return presign_url(creds, PresignRequest{
258 method: http_method_to_string(opts.method)
259 path: path
260 expires_in: opts.expires_in
261 extra_query: extra
262 })
263}
264
265// file returns a File reference for the given key, bound to this client.
266// `path` may be `<bucket>/<key>` if the client has no default bucket and
267// `bucket` here is empty.
268pub fn (c &Client) file(key string, opts FileOptions) File {
269 return File{
270 client: c
271 key: key
272 bucket: opts.bucket
273 }
274}
275
276// creds_for returns credentials with `bucket` overridden if the caller passed one.
277fn (c &Client) creds_for(bucket string) Credentials {
278 if bucket == '' {
279 return c.credentials
280 }
281 mut dup := c.credentials
282 dup = Credentials{
283 ...dup
284 bucket: bucket
285 }
286 return dup
287}
288
289// build_object_path produces the canonical URI path for a key.
290// Path style: `/<bucket>/<encoded-key>[+endpoint extra path]`
291// Virtual hosted: `/<encoded-key>`
292// Returns an error if neither bucket nor key is provided.
293// The key is forwarded byte-exact (only percent-encoded): S3 treats keys as
294// opaque identifiers, so `folder/`, `/x` and `a//b` all designate distinct
295// objects and must reach the wire as written.
296pub fn build_object_path(creds Credentials, bucket_override string, key string) !string {
297 bucket := if bucket_override != '' { bucket_override } else { creds.bucket }
298 if key == '' {
299 return new_error('InvalidPath', 'Empty object key')
300 }
301 encoded_key := uri_encode_path(key)
302 extra := creds.extra_path()
303 if creds.virtual_hosted_style {
304 return '${extra}/${encoded_key}'
305 }
306 if bucket == '' {
307 return new_error('InvalidPath',
308 'No bucket given (set Credentials.bucket or pass bucket via options)')
309 }
310 encoded_bucket := uri_encode_path(strip_slashes(bucket))
311 return '${extra}/${encoded_bucket}/${encoded_key}'
312}
313
314// HttpResponse is the small subset of an HTTP response we surface to callers
315// of the lower-level helpers. `body` is the raw response body for non-stream
316// requests; `header` keeps V's typed Header for parsing.
317pub struct HttpResponse {
318pub:
319 status_code int
320 body string
321 header http.Header
322}
323
324// do_http fires off the signed request via V's `net.http`. The HTTP method is
325// taken from `signed.method` (set by `sign_request`) so callers cannot drift
326// between what was signed and what is sent on the wire.
327fn (c &Client) do_http(signed SignedRequest, body string) !HttpResponse {
328 mut header := http.new_header()
329 for k, v in signed.headers {
330 // `Host` is set automatically by V's http client; skip to avoid duplicates.
331 if k.to_lower() == 'host' {
332 continue
333 }
334 header.add_custom(canonical_header_name(k), v)!
335 }
336 method_enum := parse_http_method(signed.method)!
337 req := http.new_request(method_enum, signed.url, body)
338 mut req_mut := http.Request{
339 ...req
340 header: header
341 read_timeout: c.read_timeout
342 write_timeout: c.write_timeout
343 }
344 resp := req_mut.do() or {
345 return new_error('NetworkError', 'HTTP request failed: ${err.msg()}')
346 }
347 $if s3_debug ? {
348 eprintln('[s3] ${signed.method} ${signed.url}')
349 for k, v in signed.headers {
350 eprintln('[s3] > ${k}: ${redacted_value(k, v)}')
351 }
352 eprintln('[s3] -> ${resp.status_code}')
353 eprintln('[s3] body: ${resp.body}')
354 }
355 return HttpResponse{
356 status_code: resp.status_code
357 body: resp.body
358 header: resp.header
359 }
360}
361
362// redacted_value masks credentials so they cannot leak via s3_debug logs.
363fn redacted_value(name string, value string) string {
364 low := name.to_lower()
365 if low == 'authorization' || low.starts_with('x-amz-security-token') {
366 return '<redacted>'
367 }
368 return value
369}
370
371// canonical_header_name turns 'x-amz-content-sha256' into 'X-Amz-Content-Sha256'
372// for the wire. HTTP/1.1 says headers are case-insensitive, but some
373// intermediaries / mocks aren't, so we emit the canonical Title-Case form.
374fn canonical_header_name(s string) string {
375 mut out := []u8{cap: s.len}
376 mut upper_next := true
377 for b in s.bytes() {
378 if b == `-` {
379 out << b
380 upper_next = true
381 continue
382 }
383 if upper_next && b >= `a` && b <= `z` {
384 out << b - 32
385 } else {
386 out << b
387 }
388 upper_next = false
389 }
390 return out.bytestr()
391}
392
393// parse_http_method maps a SigV4 canonical method string to V's `http.Method`.
394// The signer enforces the supported set up-front, so anything unexpected here
395// indicates a programming bug rather than user input.
396fn parse_http_method(s string) !http.Method {
397 return match s.to_upper() {
398 'GET' { http.Method.get }
399 'PUT' { http.Method.put }
400 'POST' { http.Method.post }
401 'DELETE' { http.Method.delete }
402 'HEAD' { http.Method.head }
403 else { new_error('InvalidMethod', 'Unsupported HTTP method: ${s}') }
404 }
405}
406