| 1 | // Copyright (c) 2020-2024 Joe Conigliaro. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | module token |
| 5 | |
| 6 | // TODO: finish fileset / file / base pos etc |
| 7 | |
| 8 | // Pos encodes a source position with a unique ID for use as a map key. |
| 9 | // offset: file-set global byte offset (for error messages, file lookup) |
| 10 | // id: unique sequential ID (for expr_types map key — avoids collisions) |
| 11 | pub struct Pos { |
| 12 | pub: |
| 13 | offset int |
| 14 | id int |
| 15 | } |
| 16 | |
| 17 | // str returns a readable summary of the position key fields. |
| 18 | pub fn (p Pos) str() string { |
| 19 | return '{ offset: ${p.offset}, id: ${p.id} }' |
| 20 | } |
| 21 | |
| 22 | pub fn (p Pos) is_valid() bool { |
| 23 | return p.id > 0 |
| 24 | } |
| 25 | |
| 26 | pub struct Position { |
| 27 | pub: |
| 28 | filename string |
| 29 | offset int |
| 30 | line int |
| 31 | column int |
| 32 | } |
| 33 | |
| 34 | pub fn (p Position) str() string { |
| 35 | return '${p.filename}:${p.line}:${p.column}' |
| 36 | } |
| 37 | |
| 38 | pub struct File { |
| 39 | pub: |
| 40 | name string |
| 41 | base int |
| 42 | size int |
| 43 | mut: |
| 44 | line_offsets []int = [0] // start of each line |
| 45 | id_counter &int = unsafe { nil } // shared global counter for unique expression IDs |
| 46 | } |
| 47 | |
| 48 | pub struct FileSet { |
| 49 | mut: |
| 50 | base int = 1 // reserve 0 for no position |
| 51 | id_counter int // global counter for unique expression IDs across all files |
| 52 | // files shared []&File |
| 53 | files []&File |
| 54 | } |
| 55 | |
| 56 | pub fn FileSet.new() &FileSet { |
| 57 | return &FileSet{} |
| 58 | } |
| 59 | |
| 60 | // TODO: |
| 61 | pub fn (mut fs FileSet) add_file(filename string, base_ int, size int) &File { |
| 62 | // eprintln('>>> add_file fs: ${voidptr(fs)} | filename: ${filename} | base_: ${base_} | size: ${size}') |
| 63 | mut base := if base_ < 0 { fs.base } else { base_ } |
| 64 | |
| 65 | // eprintln('>>> add_file fs: ${voidptr(fs)} | base: ${base:10} | fs.base: ${fs.base} | base_: ${base_:10} | size: ${size:10} | filename: ${filename}') |
| 66 | |
| 67 | if base < fs.base { |
| 68 | panic('invalid base ${base} (should be >= ${fs.base}') |
| 69 | } |
| 70 | file := &File{ |
| 71 | name: filename |
| 72 | base: base |
| 73 | size: size |
| 74 | id_counter: &fs.id_counter |
| 75 | } |
| 76 | if size < 0 { |
| 77 | panic('invalid size ${size} (should be >= 0)') |
| 78 | } |
| 79 | base += size + 1 // +1 because EOF also has a position |
| 80 | if base < 0 { |
| 81 | panic('token.Pos offset overflow (> 2G of source code in file set)') |
| 82 | } |
| 83 | // add the file to the file set |
| 84 | // fs.base = base |
| 85 | // lock fs.files { |
| 86 | // // TODO: add shared to fs.base (fix compiler errors first) |
| 87 | // fs.files << file |
| 88 | // } |
| 89 | // fs.last = file |
| 90 | fs.base = base |
| 91 | fs.files << file |
| 92 | return file |
| 93 | } |
| 94 | |
| 95 | fn search_files(files []&File, x int) int { |
| 96 | // binary search |
| 97 | mut min, mut max := 0, files.len |
| 98 | for min < max { |
| 99 | mid := (min + max) / 2 |
| 100 | // println('# min: ${min}, mid: ${mid}, max: ${max}') |
| 101 | if files[mid].base <= x { |
| 102 | min = mid + 1 |
| 103 | } else { |
| 104 | max = mid |
| 105 | } |
| 106 | } |
| 107 | return min - 1 |
| 108 | |
| 109 | // linear search |
| 110 | // for i := files.len-1; i>=0; i-- { |
| 111 | // file := files[i] |
| 112 | // if file.base < x && x <= file.base + file.size { |
| 113 | // // println('found file for pos `${x}` i = ${i}:') |
| 114 | // // dump(file) |
| 115 | // return i |
| 116 | // } |
| 117 | // } |
| 118 | // return -1 |
| 119 | } |
| 120 | |
| 121 | pub fn (mut fs FileSet) file(pos Pos) &File { |
| 122 | // eprintln('>>>>>>>>> file fs: ${voidptr(fs)} | pos: ${pos}') |
| 123 | |
| 124 | // lock fs.files |
| 125 | // last file |
| 126 | // if last_file := fs.files.last() { |
| 127 | // // p_int |
| 128 | // if last_file.base <= int(pos) && int(p) <- f.base+f.size { |
| 129 | // return last_file |
| 130 | // } |
| 131 | // } |
| 132 | // i := search_files(lock fs.files { fs.files }, pos) |
| 133 | i := search_files(fs.files, pos.offset) |
| 134 | if i >= 0 { |
| 135 | file := fs.files[i] |
| 136 | if pos.offset <= file.base + file.size { |
| 137 | // we could store last and retrieve and try above |
| 138 | return file |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | dump(fs) |
| 143 | panic('cannot find file for pos: ${pos}') |
| 144 | } |
| 145 | |
| 146 | // pub fn new_file(filename string) File { |
| 147 | // return File{ |
| 148 | // name: filename |
| 149 | // } |
| 150 | // } |
| 151 | |
| 152 | @[inline] |
| 153 | pub fn (mut f File) add_line(offset int) { |
| 154 | f.line_offsets << offset |
| 155 | } |
| 156 | |
| 157 | @[inline] |
| 158 | pub fn (f &File) line_count() int { |
| 159 | return f.line_offsets.len |
| 160 | } |
| 161 | |
| 162 | pub fn (f &File) line_start(line int) int { |
| 163 | idx := line - 1 |
| 164 | if idx < 0 || idx >= f.line_offsets.len { |
| 165 | panic('invalid line `${line}` (must be > 0 & < ${f.line_count()})') |
| 166 | } |
| 167 | return f.line_offsets[idx] |
| 168 | } |
| 169 | |
| 170 | pub fn (f &File) line(pos Pos) int { |
| 171 | return f.find_line(pos.offset - f.base) |
| 172 | } |
| 173 | |
| 174 | pub fn (mut f File) pos(offset int) Pos { |
| 175 | if offset > f.size { |
| 176 | panic('invalid offset') |
| 177 | } |
| 178 | mut current_id := 0 |
| 179 | mut next_id := 0 |
| 180 | unsafe { |
| 181 | current_id = *f.id_counter |
| 182 | } |
| 183 | next_id = current_id + 1 |
| 184 | unsafe { |
| 185 | *f.id_counter = next_id |
| 186 | } |
| 187 | return Pos{ |
| 188 | offset: f.base + offset |
| 189 | id: next_id |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | pub fn (f &File) position(pos Pos) Position { |
| 194 | offset := pos.offset - f.base |
| 195 | line, column := f.find_line_and_column(offset) |
| 196 | return Position{ |
| 197 | filename: f.name |
| 198 | offset: offset |
| 199 | line: line |
| 200 | column: column |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | // return (line, column) when passed pos |
| 205 | pub fn (f &File) find_line_and_column(pos int) (int, int) { |
| 206 | line := f.find_line(pos) |
| 207 | return line, pos - f.line_offsets[line - 1] + 1 |
| 208 | } |
| 209 | |
| 210 | // return line when passed pos (binary search) |
| 211 | // NOTE: only used for error conditions |
| 212 | // therefore search speed is not an issue |
| 213 | pub fn (f &File) find_line(pos int) int { |
| 214 | mut min, mut max := 0, f.line_offsets.len |
| 215 | for min < max { |
| 216 | mid := (min + max) / 2 |
| 217 | // println('# min: ${min}, mid: ${mid}, max: ${max}') |
| 218 | if f.line_offsets[mid] <= pos { |
| 219 | min = mid + 1 |
| 220 | } else { |
| 221 | max = mid |
| 222 | } |
| 223 | } |
| 224 | return min |
| 225 | } |
| 226 | |