From 416e5eeea81e7289e2b235378b162aed4b8a7ae0 Mon Sep 17 00:00:00 2001 From: David Legrand <1110600+davlgd@users.noreply.github.com> Date: Thu, 20 Nov 2025 09:31:09 +0100 Subject: [PATCH] veb: support markdown content negotiation, compliant with https://llmstxt.org/ (#25782) --- vlib/veb/README.md | 78 +++++++++++++++++ vlib/veb/consts.v | 1 + vlib/veb/static_handler.v | 3 + vlib/veb/tests/static_handler_test.v | 86 +++++++++++++++++++ vlib/veb/tests/testdata/about.html.md | 3 + vlib/veb/tests/testdata/about.md | 3 + vlib/veb/tests/testdata/about/index.html.md | 3 + vlib/veb/tests/testdata/page.html | 1 + vlib/veb/tests/testdata/page.html.md | 3 + .../tests/testdata/sub_folder/index.html.md | 3 + vlib/veb/tests/testdata/test.md | 3 + vlib/veb/veb.v | 35 +++++++- 12 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 vlib/veb/tests/testdata/about.html.md create mode 100644 vlib/veb/tests/testdata/about.md create mode 100644 vlib/veb/tests/testdata/about/index.html.md create mode 100644 vlib/veb/tests/testdata/page.html create mode 100644 vlib/veb/tests/testdata/page.html.md create mode 100644 vlib/veb/tests/testdata/sub_folder/index.html.md create mode 100644 vlib/veb/tests/testdata/test.md diff --git a/vlib/veb/README.md b/vlib/veb/README.md index b697fa6d9..4e193d6e7 100644 --- a/vlib/veb/README.md +++ b/vlib/veb/README.md @@ -527,6 +527,84 @@ curl -H "Accept-Encoding: gzip" -i http://localhost:8080/style.css to disable auto-compression completely. For optimal performance on read-only systems, pre-compress all files with `gzip -k`. +### Markdown content negotiation + +veb can provide automatic content negotiation for markdown files, allowing you to serve +markdown content when the client explicitly requests it via the `Accept` header. +This is compliant to [llms.txt](https://llmstxt.org/) proposal and useful for documentations that can serve +the same content in multiple formats, more efficiently to AI services using it. + +**How it works:** + +When `enable_markdown_negotiation` is enabled and a client sends `Accept: text/markdown`, +veb will try to serve markdown variants in the following priority order: + +1. `path.md` - Direct markdown file +2. `path.html.md` - HTML-flavored markdown (for content that can be rendered as both) +3. `path/index.html.md` - Directory index in markdown format + +Without the `Accept: text/markdown` header, files are served normally based on their +actual extension. This ensures backward compatibility - direct access to `.md` files +always works regardless of the setting. + +**Example:** + +```v +module main + +import veb + +pub struct Context { + veb.Context +} + +pub struct App { + veb.StaticHandler +} + +fn main() { + mut app := &App{} + + // Enable markdown content negotiation (disabled by default) + app.enable_markdown_negotiation = true + + // Serve files from the 'docs' directory + app.handle_static('docs', true)! + + veb.run[App, Context](mut app, 8080) +} +``` + +**Setup and testing:** + +Create test files in the `docs` directory: +```bash +mkdir -p docs +echo "# API Documentation" > docs/api.md +echo "# User Guide" > docs/guide.html.md +echo "

HTML Version

" > docs/api.html +``` + +Run the server: +```bash +v run server.v +``` + +Test content negotiation with cURL: +```bash +# Request markdown version with content negotiation - serves api.md +curl -H "Accept: text/markdown" http://localhost:8080/api + +# Direct access to .md file always works, regardless of Accept header +curl http://localhost:8080/api.md + +# Direct access to .html file +curl http://localhost:8080/api.html + +# Without Accept: text/markdown header - returns 404 since 'api' without extension doesn't exist +curl http://localhost:8080/api +``` + ## Middleware Middleware in web development is (loosely defined) a hidden layer that sits between diff --git a/vlib/veb/consts.v b/vlib/veb/consts.v index 7832e7369..08b376d46 100644 --- a/vlib/veb/consts.v +++ b/vlib/veb/consts.v @@ -95,6 +95,7 @@ pub const mime_types = { '.js': 'text/javascript' '.json': 'application/json' '.jsonld': 'application/ld+json' + '.md': 'text/markdown' '.mid': 'audio/midi audio/x-midi' '.midi': 'audio/midi audio/x-midi' '.mjs': 'text/javascript' diff --git a/vlib/veb/static_handler.v b/vlib/veb/static_handler.v index 067cd2ab4..ae98285aa 100644 --- a/vlib/veb/static_handler.v +++ b/vlib/veb/static_handler.v @@ -30,6 +30,9 @@ pub mut: // Default: 1MB (1024*1024 bytes). Set to 0 to disable auto-compression completely (only pre-compressed .gz files will be served). // Note: On readonly filesystems, if .gz caching fails, compressed content is served from memory as fallback. static_gzip_max_size int = 1048576 + // enable_markdown_negotiation allows the client sends Accept: text/markdown, then the server will serve .md files, if any. + // Default: false (for backward compatibility) + enable_markdown_negotiation bool } // scan_static_directory recursively scans `directory_path` and returns an error if diff --git a/vlib/veb/tests/static_handler_test.v b/vlib/veb/tests/static_handler_test.v index 28a0defa6..a2f7f7f23 100644 --- a/vlib/veb/tests/static_handler_test.v +++ b/vlib/veb/tests/static_handler_test.v @@ -61,6 +61,9 @@ fn run_app_test() { app.handle_static('testdata', true) or { panic(err) } + // Enable markdown content negotiation for testing + app.enable_markdown_negotiation = true + if _ := app.mount_static_folder_at('testdata', 'static') { assert true == false, 'should throw invalid mount path error' } else { @@ -126,3 +129,86 @@ fn test_upper_case_mime_type() { assert x.status() == .ok assert x.body == 'body' } + +// Content negotiation tests - Priority order +// Tests verify: path.md > path.html.md > path/index.html.md + +fn test_markdown_negotiation_priority_first() { + // When all three variants exist, path.md (priority 1) is served + config := http.FetchConfig{ + url: '${localserver}/about' + header: http.new_header(key: .accept, value: 'text/markdown') + } + x := http.fetch(config)! + + assert x.status() == .ok + assert x.header.get(.content_type)! == 'text/markdown' + assert x.body.contains('This is the about page in markdown format.') + assert !x.body.contains('about.html.md variant') + assert !x.body.contains('about/index.html.md variant') +} + +fn test_markdown_negotiation_priority_second() { + // When only path.html.md exists (priority 2), it is served + config := http.FetchConfig{ + url: '${localserver}/page' + header: http.new_header(key: .accept, value: 'text/markdown') + } + x := http.fetch(config)! + + assert x.status() == .ok + assert x.header.get(.content_type)! == 'text/markdown' + assert x.body.contains('# Page HTML Markdown') +} + +fn test_markdown_negotiation_directory_index() { + // For directories, index.html.md is served when Accept: text/markdown + config := http.FetchConfig{ + url: '${localserver}/sub_folder/' + header: http.new_header(key: .accept, value: 'text/markdown') + } + x := http.fetch(config)! + + assert x.status() == .ok + assert x.header.get(.content_type)! == 'text/markdown' + assert x.body.contains('# Index HTML Markdown') +} + +// Direct access tests - Verifies backward compatibility + +fn test_markdown_direct_access() { + // Without Accept header + x_no_header := http.get('${localserver}/test.md')! + assert x_no_header.status() == .ok + assert x_no_header.header.get(.content_type)! == 'text/markdown' + assert x_no_header.body.contains('# Test Markdown') + + // With Accept: text/markdown header - same result + config := http.FetchConfig{ + url: '${localserver}/test.md' + header: http.new_header(key: .accept, value: 'text/markdown') + } + x_with_header := http.fetch(config)! + assert x_with_header.status() == .ok + assert x_with_header.header.get(.content_type)! == 'text/markdown' + assert x_with_header.body.contains('# Test Markdown') +} + +fn test_markdown_variants_direct_access() { + // All markdown variants remain accessible via their full paths + x_html_md := http.get('${localserver}/about.html.md')! + assert x_html_md.status() == .ok + assert x_html_md.body.contains('about.html.md variant') + + x_index := http.get('${localserver}/about/index.html.md')! + assert x_index.status() == .ok + assert x_index.body.contains('about/index.html.md variant') +} + +// Negative tests - Verifies correct behavior without Accept header + +fn test_markdown_no_negotiation_without_header() { + // Without Accept: text/markdown, content is not found for directories with no index.html + x := http.get('${localserver}/about')! + assert x.status() == .not_found +} diff --git a/vlib/veb/tests/testdata/about.html.md b/vlib/veb/tests/testdata/about.html.md new file mode 100644 index 000000000..6ead5c896 --- /dev/null +++ b/vlib/veb/tests/testdata/about.html.md @@ -0,0 +1,3 @@ +# About HTML Markdown + +This is about.html.md variant. diff --git a/vlib/veb/tests/testdata/about.md b/vlib/veb/tests/testdata/about.md new file mode 100644 index 000000000..1593e6bd2 --- /dev/null +++ b/vlib/veb/tests/testdata/about.md @@ -0,0 +1,3 @@ +# About Page + +This is the about page in markdown format. diff --git a/vlib/veb/tests/testdata/about/index.html.md b/vlib/veb/tests/testdata/about/index.html.md new file mode 100644 index 000000000..694f830ef --- /dev/null +++ b/vlib/veb/tests/testdata/about/index.html.md @@ -0,0 +1,3 @@ +# About Index HTML Markdown + +This is about/index.html.md variant. diff --git a/vlib/veb/tests/testdata/page.html b/vlib/veb/tests/testdata/page.html new file mode 100644 index 000000000..d7fed1c2e --- /dev/null +++ b/vlib/veb/tests/testdata/page.html @@ -0,0 +1 @@ +HTML Page diff --git a/vlib/veb/tests/testdata/page.html.md b/vlib/veb/tests/testdata/page.html.md new file mode 100644 index 000000000..9d6db6bec --- /dev/null +++ b/vlib/veb/tests/testdata/page.html.md @@ -0,0 +1,3 @@ +# Page HTML Markdown + +This is a page.html.md file. diff --git a/vlib/veb/tests/testdata/sub_folder/index.html.md b/vlib/veb/tests/testdata/sub_folder/index.html.md new file mode 100644 index 000000000..b26cd95bd --- /dev/null +++ b/vlib/veb/tests/testdata/sub_folder/index.html.md @@ -0,0 +1,3 @@ +# Index HTML Markdown + +This is an index.html.md file in a subfolder. diff --git a/vlib/veb/tests/testdata/test.md b/vlib/veb/tests/testdata/test.md new file mode 100644 index 000000000..f2f9f7960 --- /dev/null +++ b/vlib/veb/tests/testdata/test.md @@ -0,0 +1,3 @@ +# Test Markdown + +This is a test markdown file. diff --git a/vlib/veb/veb.v b/vlib/veb/veb.v index c6fe96897..c342f3549 100644 --- a/vlib/veb/veb.v +++ b/vlib/veb/veb.v @@ -383,14 +383,45 @@ fn route_matches(url_words []string, route_words []string) ?[]string { fn serve_if_static[A, X](app &A, mut user_context X, url urllib.URL, host string) bool { // TODO: handle url parameters properly - for now, ignore them mut asked_path := url.path - base_path := os.base(asked_path) + // Content negotiation for markdown files (if enabled) + if app.enable_markdown_negotiation { + accept_header := user_context.req.header.get(.accept) or { '' } + if accept_header.contains('text/markdown') { + // Try markdown variants in order of priority + markdown_variants := [ + asked_path + '.md', + asked_path + '.html.md', + asked_path + '/index.html.md', + ] + + for variant in markdown_variants { + if app.static_files[variant] != '' { + asked_path = variant + break + } + } + } + } + + base_path := os.base(asked_path) if !base_path.contains('.') && !asked_path.ends_with('/') { asked_path += '/' } if asked_path.ends_with('/') { - if app.static_files[asked_path + 'index.html'] != '' { + // Check for markdown index first if Accept header requests it and feature is enabled + if app.enable_markdown_negotiation { + accept_header := user_context.req.header.get(.accept) or { '' } + if accept_header.contains('text/markdown') + && app.static_files[asked_path + 'index.html.md'] != '' { + asked_path += 'index.html.md' + } else if app.static_files[asked_path + 'index.html'] != '' { + asked_path += 'index.html' + } else if app.static_files[asked_path + 'index.htm'] != '' { + asked_path += 'index.htm' + } + } else if app.static_files[asked_path + 'index.html'] != '' { asked_path += 'index.html' } else if app.static_files[asked_path + 'index.htm'] != '' { asked_path += 'index.htm' -- 2.39.5