| 1 | module token |
| 2 | |
| 3 | // KeywordsMatcherTrie provides a faster way of determining whether a given name |
| 4 | // is a reserved word (belongs to a given set of previously known words `R`). |
| 5 | // See the module description for more details. |
| 6 | @[heap] |
| 7 | pub struct KeywordsMatcherTrie { |
| 8 | pub mut: |
| 9 | nodes []&TrieNode |
| 10 | min_len int = 999999 |
| 11 | max_len int |
| 12 | } |
| 13 | |
| 14 | // str returns a short representation of matcher |
| 15 | pub fn (km &KeywordsMatcherTrie) str() string { |
| 16 | return 'KeywordsMatcherTrie{ /* nodes.len: ${km.nodes.len} */ min_len: ${km.min_len}, max_len: ${km.max_len} }' |
| 17 | } |
| 18 | |
| 19 | // TrieNode is a single node from a trie, used by KeywordsMatcherTrie |
| 20 | pub struct TrieNode { |
| 21 | pub mut: |
| 22 | children [123]&TrieNode |
| 23 | value int = -1 // when != -1, it is a leaf node representing a match |
| 24 | } |
| 25 | |
| 26 | // str returns a string representation of the node content |
| 27 | pub fn (node &TrieNode) str() string { |
| 28 | if isnil(node) { |
| 29 | return '&TrieNode(nil)' |
| 30 | } |
| 31 | return '&TrieNode{value: ${node.value}}' |
| 32 | } |
| 33 | |
| 34 | // find tries to find the given `word` in the set of all previously added words |
| 35 | // to the KeywordsMatcherTrie instance. It returns -1 if the word was NOT found |
| 36 | // there at all. If the word was found, find will return the `value` (value => 0), |
| 37 | // associated with the word, when it was added. |
| 38 | @[direct_array_access] |
| 39 | pub fn (km &KeywordsMatcherTrie) find(word string) int { |
| 40 | wlen := word.len |
| 41 | if wlen < km.min_len || wlen > km.max_len { |
| 42 | return -1 |
| 43 | } |
| 44 | node := km.nodes[wlen] |
| 45 | if node == unsafe { nil } { |
| 46 | return -1 |
| 47 | } |
| 48 | return node.find(word) |
| 49 | } |
| 50 | |
| 51 | // matches returns true when the word was already added, i.e. when it was found. |
| 52 | @[inline] |
| 53 | pub fn (km &KeywordsMatcherTrie) matches(word string) bool { |
| 54 | return km.find(word) != -1 |
| 55 | } |
| 56 | |
| 57 | // add_word adds the given word to the KeywordsMatcherTrie instance. It associates a non |
| 58 | // negative integer value to it, so later `find` could return the value, when it succeeds. |
| 59 | @[direct_array_access; markused] |
| 60 | pub fn (mut km KeywordsMatcherTrie) add_word(word string, value int) { |
| 61 | wlen := word.len |
| 62 | if km.max_len < wlen { |
| 63 | km.max_len = wlen |
| 64 | } |
| 65 | if km.min_len > wlen { |
| 66 | km.min_len = wlen |
| 67 | } |
| 68 | // add more top level slots, if needed: |
| 69 | for km.nodes.len < wlen + 1 { |
| 70 | // eprintln('>>>>>>>>>>>>>> appending more nodes for word: ${word} | value: ${value} | km.nodes.len: ${km.nodes.len} | wlen: ${wlen}') |
| 71 | km.nodes << unsafe { &TrieNode(nil) } |
| 72 | } |
| 73 | if km.nodes[wlen] == unsafe { nil } { |
| 74 | km.nodes[wlen] = new_trie_node() |
| 75 | } |
| 76 | km.nodes[wlen].add_word(word, value, 0) |
| 77 | } |
| 78 | |
| 79 | pub fn KeywordsMatcherTrie.new(cap int) KeywordsMatcherTrie { |
| 80 | mut km := KeywordsMatcherTrie{ |
| 81 | nodes: []&TrieNode{cap: cap} |
| 82 | } |
| 83 | for _ in 0 .. cap { |
| 84 | km.nodes << &TrieNode(unsafe { nil }) |
| 85 | } |
| 86 | return km |
| 87 | } |
| 88 | |
| 89 | // new_keywords_matcher_trie creates a new KeywordsMatcherTrie instance from a given map |
| 90 | // with string keys, and integer or enum values. |
| 91 | pub fn new_keywords_matcher_trie[T](kw_map map[string]T) KeywordsMatcherTrie { |
| 92 | mut km := KeywordsMatcherTrie.new(10) |
| 93 | for k, v in kw_map { |
| 94 | km.add_word(k, int(v)) |
| 95 | } |
| 96 | // dump(km.min_len) |
| 97 | // dump(km.max_len) |
| 98 | // for idx,x in km.nodes { if x != unsafe { nil } { eprintln('>> idx: ${idx} | ${ptr_str(x)}') } } |
| 99 | return km |
| 100 | } |
| 101 | |
| 102 | // new_keywords_matcher_from_array_trie creates a new KeywordsMatcherTrie instance from a given array |
| 103 | // of strings. The values for the strings, that `find` will return, will be the indexes in that array. |
| 104 | pub fn new_keywords_matcher_from_array_trie(names []string) KeywordsMatcherTrie { |
| 105 | mut m := map[string]int{} |
| 106 | for i, name in names { |
| 107 | m[name] = i |
| 108 | } |
| 109 | return new_keywords_matcher_trie[int](m) |
| 110 | } |
| 111 | |
| 112 | // |
| 113 | |
| 114 | // new_trie_node creates a new TrieNode instance |
| 115 | pub fn new_trie_node() &TrieNode { |
| 116 | return &TrieNode{} |
| 117 | } |
| 118 | |
| 119 | // show displays the information in `node`, in a more compact/readable format (recursively) |
| 120 | pub fn (node &TrieNode) show(level int) { |
| 121 | mut non_nil_children := []int{} |
| 122 | for idx, x in node.children { |
| 123 | if x != unsafe { nil } { |
| 124 | non_nil_children << idx |
| 125 | } |
| 126 | } |
| 127 | children := non_nil_children.map(u8(it).ascii_str()) |
| 128 | eprintln('> level: ${level:2} | value: ${node.value:12} | non_nil_children: ${non_nil_children.len:2} | ${children}') |
| 129 | for x in node.children { |
| 130 | if x != unsafe { nil } { |
| 131 | x.show(level + 1) |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | // add_word adds another `word` and `value` pair into the trie, starting from `node` (recursively). |
| 137 | // `word_idx` is just used as an accumulator, and starts from 0 at the root of the tree. |
| 138 | @[direct_array_access; markused] |
| 139 | pub fn (mut node TrieNode) add_word(word string, value int, word_idx int) { |
| 140 | if word_idx < 0 || word_idx >= word.len { |
| 141 | node.value = value |
| 142 | return |
| 143 | } |
| 144 | first := u8(word[word_idx]) |
| 145 | // eprintln('>> node: ${ptr_str(node)} | first: ${first} | word_idx: ${word_idx}') |
| 146 | mut child_node := node.children[first] |
| 147 | if child_node == unsafe { nil } { |
| 148 | child_node = new_trie_node() |
| 149 | node.children[first] = child_node |
| 150 | } |
| 151 | child_node.add_word(word, value, word_idx + 1) |
| 152 | } |
| 153 | |
| 154 | // find tries to find a match for `word` to the trie (the set of all previously added words). |
| 155 | // It returns -1 if there is no match, or the value associated with the previously added |
| 156 | // matching word by `add_word`. |
| 157 | @[direct_array_access] |
| 158 | pub fn (root &TrieNode) find(word string) int { |
| 159 | wlen := word.len |
| 160 | mut node := unsafe { &TrieNode(root) } |
| 161 | mut idx := 0 |
| 162 | for { |
| 163 | // eprintln('> match_keyword: `${word:20}` | node: ${ptr_str(node)} | idx: ${idx:3}') |
| 164 | if idx == wlen { |
| 165 | k := node.value |
| 166 | if k != -1 { |
| 167 | // node.show(0) |
| 168 | return k |
| 169 | } |
| 170 | return -1 |
| 171 | } |
| 172 | c := word[idx] |
| 173 | child := node.children[c] |
| 174 | if child == unsafe { nil } { |
| 175 | return -1 |
| 176 | } |
| 177 | node = unsafe { child } |
| 178 | idx++ |
| 179 | } |
| 180 | return -1 |
| 181 | } |
| 182 | |