v2 / vlib / net / html / dom.v
231 lines · 212 sloc · 6.95 KB · e158655f4f28a73358b9e24e82963a193d9fc30e
Raw
1module html
2
3import os
4import strings
5
6// The W3C Document Object Model (DOM) is a platform and language-neutral
7// interface that allows programs and scripts to dynamically access and
8// update the content, structure, and style of a document.
9//
10// https://www.w3.org/TR/WD-DOM/introduction.html
11pub struct DocumentObjectModel {
12mut:
13 root &Tag = unsafe { nil }
14 constructed bool
15 btree BTree
16 all_tags []&Tag
17 all_attributes map[string][]&Tag
18 close_tags map[string]bool // add a counter to see count how many times is closed and parse correctly
19 attributes map[string][]string
20 tag_attributes map[string][][]&Tag
21 tag_type map[string][]&Tag
22 debug_file os.File
23}
24
25@[params]
26pub struct GetTagsOptions {
27pub:
28 name string
29}
30
31@[if debug_html ?]
32fn (mut dom DocumentObjectModel) print_debug(data string) {
33 if data.len > 0 {
34 dom.debug_file.writeln(data) or { eprintln(err) }
35 }
36}
37
38@[inline]
39fn is_close_tag(tag &Tag) bool {
40 return tag.name != '' && tag.name[0] == `/`
41}
42
43fn (mut dom DocumentObjectModel) where_is(item_name string, attribute_name string) int {
44 if attribute_name !in dom.attributes {
45 dom.attributes[attribute_name] = []string{}
46 }
47 mut string_array := dom.attributes[attribute_name]
48 mut counter := 0
49 for value in string_array {
50 if value == item_name {
51 return counter
52 }
53 counter++
54 }
55 string_array << item_name
56 dom.attributes[attribute_name] = string_array
57 return string_array.len - 1
58}
59
60fn (mut dom DocumentObjectModel) add_tag_attribute(tag &Tag) {
61 for attribute_name, _ in tag.attributes {
62 attribute_value := tag.attributes[attribute_name]
63 location := dom.where_is(attribute_value, attribute_name)
64 if attribute_name !in dom.tag_attributes {
65 dom.tag_attributes[attribute_name] = []
66 }
67 for {
68 mut temp_array := unsafe { dom.tag_attributes[attribute_name] }
69 temp_array << []&Tag{}
70 dom.tag_attributes[attribute_name] = temp_array
71 if location < unsafe { dom.tag_attributes[attribute_name].len } + 1 {
72 break
73 }
74 }
75 mut temp_array := unsafe { dom.tag_attributes[attribute_name][location] }
76 temp_array << tag
77 dom.tag_attributes[attribute_name][location] = temp_array
78 }
79}
80
81fn (mut dom DocumentObjectModel) add_tag_by_type(tag &Tag) {
82 tag_name := tag.name
83 if tag_name !in dom.tag_type {
84 dom.tag_type[tag_name] = [tag]
85 } else {
86 mut temp_array := unsafe { dom.tag_type[tag_name] }
87 temp_array << tag
88 dom.tag_type[tag_name] = temp_array
89 }
90}
91
92fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
93 for attribute_name in tag.attributes.keys() {
94 if attribute_name !in dom.all_attributes {
95 dom.all_attributes[attribute_name] = [tag]
96 } else {
97 mut temp_array := unsafe { dom.all_attributes[attribute_name] }
98 temp_array << tag
99 dom.all_attributes[attribute_name] = temp_array
100 }
101 }
102}
103
104fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
105 dom.constructed = true
106
107 // If there are no tags, accessing `tag_list` below does panic.
108 if tag_list.len == 0 {
109 return
110 }
111
112 mut temp_map := map[string]int{}
113 mut temp_int := null_element
114 mut temp_string := ''
115 mut stack := Stack{}
116 dom.btree = BTree{}
117 dom.root = tag_list[0]
118 dom.all_tags = [tag_list[0]]
119 temp_map['0'] = dom.btree.add_children(tag_list[0])
120 stack.push(0)
121 root_index := 0
122
123 for index := 1; index < tag_list.len; index++ {
124 mut tag := tag_list[index]
125 dom.print_debug(tag.str())
126 if is_close_tag(tag) {
127 temp_int = stack.peek()
128 temp_string = tag.name[1..]
129 old_stack_size := stack.size
130 for !is_null(temp_int) && temp_string != tag_list[temp_int].name
131 && !tag_list[temp_int].closed {
132 dom.print_debug(temp_string + ' >> ' + tag_list[temp_int].name + ' ' +
133 (temp_string == tag_list[temp_int].name).str())
134 stack.pop()
135 temp_int = stack.peek()
136 }
137 if is_null(temp_int) || temp_string != tag_list[temp_int].name {
138 stack.size = old_stack_size
139 continue
140 }
141 temp_int = stack.peek()
142 temp_int = if !is_null(temp_int) { stack.pop() } else { root_index }
143 if is_null(temp_int) {
144 stack.push(root_index)
145 }
146 dom.print_debug('Removed ' + temp_string + ' -- ' + tag_list[temp_int].name)
147 } else if tag.name != '' {
148 dom.add_tag_attribute(tag) // error here
149 dom.add_tag_by_attribute(tag)
150 dom.add_tag_by_type(tag)
151 dom.all_tags << tag
152 temp_int = stack.peek()
153 if !is_null(temp_int) {
154 dom.btree.move_pointer(temp_map[temp_int.str()])
155 temp_map[index.str()] = dom.btree.add_children(tag)
156 mut temp_tag := tag_list[temp_int]
157 position_in_parent := temp_tag.add_child(tag) // tag_list[temp_int] = temp_tag
158 tag.add_parent(temp_tag, position_in_parent)
159 /*
160 dom.print_debug("Added ${tag.name} as child of '" + tag_list[temp_int].name +
161 "' which now has ${dom.btree.get_children().len} childrens")
162 */
163 dom.print_debug("Added ${tag.name} as child of '" + temp_tag.name +
164 "' which now has ${temp_tag.children.len} childrens")
165 } else { // dom.new_root(tag)
166 stack.push(root_index)
167 }
168 temp_string = '/' + tag.name
169 if temp_string in dom.close_tags && !tag.closed { // if tag ends with />
170 dom.print_debug('Pushed ' + temp_string)
171 stack.push(index)
172 }
173 }
174 } // println(tag_list[root_index]) for debug purposes
175 dom.root = tag_list[0]
176 mut root := dom.root
177 dom.normalize_tag_content(mut root)
178}
179
180fn (mut dom DocumentObjectModel) normalize_tag_content(mut tag Tag) {
181 tag.text_content = tag.content
182 tag.content_is_inner_html = true
183 if tag.children.len == 0 {
184 return
185 }
186 mut inner_html := strings.new_builder(tag.content.len + tag.children.len * 32)
187 inner_html.write_string(tag.text_content)
188 for idx := 0; idx < tag.children.len; idx++ {
189 mut child := tag.children[idx]
190 dom.normalize_tag_content(mut child)
191 inner_html.write_string(child.str())
192 }
193 tag.content = inner_html.str()
194}
195
196// get_root returns the root of the document.
197pub fn (dom &DocumentObjectModel) get_root() &Tag {
198 return dom.root
199}
200
201// get_tags returns all tags stored in the document.
202pub fn (dom &DocumentObjectModel) get_tags(options GetTagsOptions) []&Tag {
203 if options.name != '' {
204 return if options.name in dom.tag_type {
205 unsafe { dom.tag_type[options.name] }
206 } else {
207 []&Tag{}
208 }
209 }
210 return dom.all_tags
211}
212
213// get_tags_by_class_name retrieves all tags recursively in the document root that have the given class name(s).
214pub fn (dom &DocumentObjectModel) get_tags_by_class_name(names ...string) []&Tag {
215 return dom.root.get_tags_by_class_name(...names)
216}
217
218// get_tags_by_attribute retrieves all tags in the document that have the given attribute name.
219pub fn (dom &DocumentObjectModel) get_tags_by_attribute(name string) []&Tag {
220 return if name in dom.all_attributes { unsafe { dom.all_attributes[name] } } else { []&Tag{} }
221}
222
223// get_tags_by_attribute_value retrieves all tags in the document that have the given attribute name and value.
224pub fn (mut dom DocumentObjectModel) get_tags_by_attribute_value(name string, value string) []&Tag {
225 location := dom.where_is(value, name)
226 attributes := unsafe { dom.tag_attributes[name] }
227 if attributes.len > location {
228 return attributes[location]
229 }
230 return []
231}
232