v2 / vlib / encoding / utf8 / utf8_util.v
1184 lines · 1128 sloc · 39.51 KB · e2e5cf8db56f3562c7baa735061690be936bdf3e
Raw
1// utf-8 utility string functions
2//
3// Copyright (c) 2019-2024 Dario Deledda. All rights reserved.
4// Use of this source code is governed by an MIT license
5// that can be found in the LICENSE file.
6module utf8
7
8// Utility functions
9
10// len return the length as number of unicode chars from a string
11pub fn len(s string) int {
12 if s.len == 0 {
13 return 0
14 }
15
16 mut count := 0
17 mut index := 0
18
19 for {
20 ch_len := utf8_char_len(s[index])
21 index += ch_len
22 count++
23 if index >= s.len {
24 break
25 }
26 }
27 return count
28}
29
30// get_rune convert a UTF-8 unicode codepoint in string[index] into a UTF-32 encoded rune
31pub fn get_rune(s string, index int) rune {
32 mut res := 0
33 mut ch_len := 0
34 if s.len > 0 {
35 ch_len = utf8_char_len(s[index])
36
37 if ch_len == 1 {
38 return u16(s[index])
39 }
40 if ch_len > 1 && ch_len < 5 {
41 mut lword := 0
42 for i := 0; i < ch_len; i++ {
43 lword = int(u32(lword) << 8 | u32(s[index + i]))
44 }
45
46 // 2 byte utf-8
47 // byte format: 110xxxxx 10xxxxxx
48 //
49 if ch_len == 2 {
50 res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
51 }
52 // 3 byte utf-8
53 // byte format: 1110xxxx 10xxxxxx 10xxxxxx
54 //
55 else if ch_len == 3 {
56 res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
57 }
58 // 4 byte utf-8
59 // byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
60 //
61 else if ch_len == 4 {
62 res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
63 }
64 }
65 }
66 return res
67}
68
69// raw_index - get the raw unicode character from the UTF-8 string by the given index value as UTF-8 string.
70// example: utf8.raw_index('我是V Lang', 1) => '是'
71pub fn raw_index(s string, index int) string {
72 mut r := []rune{}
73
74 for i := 0; i < s.len; i++ {
75 if r.len - 1 == index {
76 break
77 }
78
79 b := s[i]
80 ch_len := int((u32(0xe5000000) >> ((b >> 3) & 0x1e)) & 3)
81
82 r << if ch_len > 0 {
83 i += ch_len
84 rune(get_rune(s, i - ch_len))
85 } else {
86 rune(b)
87 }
88 }
89
90 return r[index].str()
91}
92
93// reverse - returns a reversed string.
94// example: utf8.reverse('你好世界hello world') => 'dlrow olleh界世好你'.
95pub fn reverse(s string) string {
96 len_s := len(s)
97 if len_s == 0 || len_s == 1 {
98 return s.clone()
99 }
100 mut str_array := []string{}
101 for i in 0 .. len_s {
102 str_array << raw_index(s, i)
103 }
104 str_array = str_array.reverse()
105 return str_array.join('')
106}
107
108// Conversion functions
109
110// to_upper return an uppercase string from a string
111pub fn to_upper(s string) string {
112 return convert_case(s, true)
113}
114
115// to_lower return an lowercase string from a string
116pub fn to_lower(s string) string {
117 return convert_case(s, false)
118}
119
120// Punctuation functions
121//
122// The "western" function search on a small table, that is quicker than
123// the global unicode table search. **Use only for western chars**.
124
125// Western
126
127// is_punct return true if the string[index] byte is the start of a unicode western punctuation
128pub fn is_punct(s string, index int) bool {
129 return is_rune_punct(get_rune(s, index))
130}
131
132// is_control return true if the rune is control code
133pub fn is_control(r rune) bool {
134 // control codes are all below 0xff
135 if r > max_latin_1 {
136 return false
137 }
138 return props[u8(r)] == 1
139}
140
141// is_letter returns true if the rune is unicode letter or in unicode category L
142pub fn is_letter(r rune) bool {
143 if (r >= `a` && r <= `z`) || (r >= `A` && r <= `Z`) {
144 return true
145 } else if r <= max_latin_1 {
146 return props[u8(r)] & p_l_mask != 0
147 }
148 return is_excluding_latin(letter_table, r)
149}
150
151// is_space returns true if the rune is character in unicode category Z with property white space or the following character set:
152// ```
153// `\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85 (NEL), 0xA0 (NBSP)
154// ```
155pub fn is_space(r rune) bool {
156 if r <= max_latin_1 {
157 match r {
158 `\t`, `\n`, `\v`, `\f`, `\r`, ` `, 0x85, 0xA0 {
159 return true
160 }
161 else {
162 return false
163 }
164 }
165 }
166 return is_excluding_latin(white_space_table, r)
167}
168
169// is_number returns true if the rune is unicode number or in unicode category N
170pub fn is_number(r rune) bool {
171 if r <= max_latin_1 {
172 return props[u8(r)] & p_n != 0
173 }
174 return is_excluding_latin(number_table, r)
175}
176
177// is_rune_punct return true if the input unicode is a western unicode punctuation
178pub fn is_rune_punct(r rune) bool {
179 return find_punct_in_table(r, unicode_punct_western) != rune(-1)
180}
181
182// Global
183
184// is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation
185pub fn is_global_punct(s string, index int) bool {
186 return is_rune_global_punct(get_rune(s, index))
187}
188
189// is_rune_global_punct return true if the input unicode is a global unicode punctuation
190pub fn is_rune_global_punct(r rune) bool {
191 return find_punct_in_table(r, unicode_punct) != rune(-1)
192}
193
194// Private functions
195
196// utf8_to_lower raw utf-8 to_lower function
197fn utf8_to_lower(in_cp int) int {
198 mut cp := in_cp
199 if (0x0041 <= cp && 0x005a >= cp) || (0x00c0 <= cp && 0x00d6 >= cp)
200 || (0x00d8 <= cp && 0x00de >= cp) || (0x0391 <= cp && 0x03a1 >= cp)
201 || (0x03a3 <= cp && 0x03ab >= cp) || (0x0410 <= cp && 0x042f >= cp) {
202 cp += 32
203 } else if 0x0400 <= cp && 0x040f >= cp {
204 cp += 80
205 } else if (0x0100 <= cp && 0x012f >= cp) || (0x0132 <= cp && 0x0137 >= cp)
206 || (0x014a <= cp && 0x0177 >= cp) || (0x0182 <= cp && 0x0185 >= cp)
207 || (0x01a0 <= cp && 0x01a5 >= cp) || (0x01de <= cp && 0x01ef >= cp)
208 || (0x01f8 <= cp && 0x021f >= cp) || (0x0222 <= cp && 0x0233 >= cp)
209 || (0x0246 <= cp && 0x024f >= cp) || (0x03d8 <= cp && 0x03ef >= cp)
210 || (0x0460 <= cp && 0x0481 >= cp) || (0x048a <= cp && 0x04ff >= cp) {
211 cp |= 0x1
212 } else if (0x0139 <= cp && 0x0148 >= cp) || (0x0179 <= cp && 0x017e >= cp)
213 || (0x01af <= cp && 0x01b0 >= cp) || (0x01b3 <= cp && 0x01b6 >= cp)
214 || (0x01cd <= cp && 0x01dc >= cp) {
215 cp += 1
216 cp &= ~0x1
217 } else if (0x0531 <= cp && 0x0556 >= cp) || (0x10A0 <= cp && 0x10C5 >= cp) {
218 // ARMENIAN or GEORGIAN
219 cp += 0x30
220 } else if ((0x1E00 <= cp && 0x1E94 >= cp) || (0x1EA0 <= cp && 0x1EF8 >= cp)) && cp & 1 == 0 {
221 // LATIN CAPITAL LETTER
222 cp += 1
223 } else if 0x24B6 <= cp && 0x24CF >= cp {
224 // CIRCLED LATIN
225 cp += 0x1a
226 } else if 0xFF21 <= cp && 0xFF3A >= cp {
227 // FULLWIDTH LATIN CAPITAL
228 cp += 0x19
229 } else if (0x1F08 <= cp && 0x1F0F >= cp) || (0x1F18 <= cp && 0x1F1D >= cp)
230 || (0x1F28 <= cp && 0x1F2F >= cp) || (0x1F38 <= cp && 0x1F3F >= cp)
231 || (0x1F48 <= cp && 0x1F4D >= cp) || (0x1F68 <= cp && 0x1F6F >= cp)
232 || (0x1F88 <= cp && 0x1F8F >= cp) || (0x1F98 <= cp && 0x1F9F >= cp)
233 || (0x1FA8 <= cp && 0x1FAF >= cp) {
234 // GREEK
235 cp -= 8
236 } else {
237 match cp {
238 0x0178 { cp = 0x00ff }
239 0x0243 { cp = 0x0180 }
240 0x018e { cp = 0x01dd }
241 0x023d { cp = 0x019a }
242 0x0220 { cp = 0x019e }
243 0x01b7 { cp = 0x0292 }
244 0x01c4 { cp = 0x01c6 }
245 0x01c7 { cp = 0x01c9 }
246 0x01ca { cp = 0x01cc }
247 0x01f1 { cp = 0x01f3 }
248 0x01f7 { cp = 0x01bf }
249 0x0187 { cp = 0x0188 }
250 0x018b { cp = 0x018c }
251 0x0191 { cp = 0x0192 }
252 0x0198 { cp = 0x0199 }
253 0x01a7 { cp = 0x01a8 }
254 0x01ac { cp = 0x01ad }
255 0x01af { cp = 0x01b0 }
256 0x01b8 { cp = 0x01b9 }
257 0x01bc { cp = 0x01bd }
258 0x01f4 { cp = 0x01f5 }
259 0x023b { cp = 0x023c }
260 0x0241 { cp = 0x0242 }
261 0x03fd { cp = 0x037b }
262 0x03fe { cp = 0x037c }
263 0x03ff { cp = 0x037d }
264 0x037f { cp = 0x03f3 }
265 0x0386 { cp = 0x03ac }
266 0x0388 { cp = 0x03ad }
267 0x0389 { cp = 0x03ae }
268 0x038a { cp = 0x03af }
269 0x038c { cp = 0x03cc }
270 0x038e { cp = 0x03cd }
271 0x038f { cp = 0x03ce }
272 0x0370 { cp = 0x0371 }
273 0x0372 { cp = 0x0373 }
274 0x0376 { cp = 0x0377 }
275 0x03f4 { cp = 0x03b8 }
276 0x03cf { cp = 0x03d7 }
277 0x03f9 { cp = 0x03f2 }
278 0x03f7 { cp = 0x03f8 }
279 0x03fa { cp = 0x03fb }
280 // GREEK
281 0x1F59 { cp = 0x1F51 }
282 0x1F5B { cp = 0x1F53 }
283 0x1F5D { cp = 0x1F55 }
284 0x1F5F { cp = 0x1F57 }
285 0x1FB8 { cp = 0x1FB0 }
286 0x1FB9 { cp = 0x1FB1 }
287 0x1FD8 { cp = 0x1FD0 }
288 0x1FD9 { cp = 0x1FD1 }
289 0x1FE8 { cp = 0x1FE0 }
290 0x1FE9 { cp = 0x1FE1 }
291 else {}
292 }
293 }
294
295 return cp
296}
297
298// utf8_to_upper raw utf-8 to_upper function
299fn utf8_to_upper(in_cp int) int {
300 mut cp := in_cp
301 if (0x0061 <= cp && 0x007a >= cp) || (0x00e0 <= cp && 0x00f6 >= cp)
302 || (0x00f8 <= cp && 0x00fe >= cp) || (0x03b1 <= cp && 0x03c1 >= cp)
303 || (0x03c3 <= cp && 0x03cb >= cp) || (0x0430 <= cp && 0x044f >= cp) {
304 cp -= 32
305 } else if 0x0450 <= cp && 0x045f >= cp {
306 cp -= 80
307 } else if (0x0100 <= cp && 0x012f >= cp) || (0x0132 <= cp && 0x0137 >= cp)
308 || (0x014a <= cp && 0x0177 >= cp) || (0x0182 <= cp && 0x0185 >= cp)
309 || (0x01a0 <= cp && 0x01a5 >= cp) || (0x01de <= cp && 0x01ef >= cp)
310 || (0x01f8 <= cp && 0x021f >= cp) || (0x0222 <= cp && 0x0233 >= cp)
311 || (0x0246 <= cp && 0x024f >= cp) || (0x03d8 <= cp && 0x03ef >= cp)
312 || (0x0460 <= cp && 0x0481 >= cp) || (0x048a <= cp && 0x04ff >= cp) {
313 cp &= ~0x1
314 } else if (0x0139 <= cp && 0x0148 >= cp) || (0x0179 <= cp && 0x017e >= cp)
315 || (0x01af <= cp && 0x01b0 >= cp) || (0x01b3 <= cp && 0x01b6 >= cp)
316 || (0x01cd <= cp && 0x01dc >= cp) {
317 cp -= 1
318 cp |= 0x1
319 } else if (0x0561 <= cp && 0x0586 >= cp) || (0x10D0 <= cp && 0x10F5 >= cp) {
320 // ARMENIAN or GEORGIAN
321 cp -= 0x30
322 } else if ((0x1E01 <= cp && 0x1E95 >= cp) || (0x1EA1 <= cp && 0x1EF9 >= cp)) && cp & 1 == 1 {
323 // LATIN CAPITAL LETTER
324 cp -= 1
325 } else if 0x24D0 <= cp && 0x24E9 >= cp {
326 // CIRCLED LATIN
327 cp -= 0x1a
328 } else if 0xFF41 <= cp && 0xFF5A >= cp {
329 // FULLWIDTH LATIN CAPITAL
330 cp -= 0x19
331 } else if (0x1F00 <= cp && 0x1F07 >= cp) || (0x1F10 <= cp && 0x1F15 >= cp)
332 || (0x1F20 <= cp && 0x1F27 >= cp) || (0x1F30 <= cp && 0x1F37 >= cp)
333 || (0x1F40 <= cp && 0x1F45 >= cp) || (0x1F60 <= cp && 0x1F67 >= cp)
334 || (0x1F80 <= cp && 0x1F87 >= cp) || (0x1F90 <= cp && 0x1F97 >= cp)
335 || (0x1FA0 <= cp && 0x1FA7 >= cp) {
336 // GREEK
337 cp += 8
338 } else {
339 match cp {
340 0x00ff { cp = 0x0178 }
341 0x0180 { cp = 0x0243 }
342 0x01dd { cp = 0x018e }
343 0x019a { cp = 0x023d }
344 0x019e { cp = 0x0220 }
345 0x0292 { cp = 0x01b7 }
346 0x01c6 { cp = 0x01c4 }
347 0x01c9 { cp = 0x01c7 }
348 0x01cc { cp = 0x01ca }
349 0x01f3 { cp = 0x01f1 }
350 0x01bf { cp = 0x01f7 }
351 0x0188 { cp = 0x0187 }
352 0x018c { cp = 0x018b }
353 0x0192 { cp = 0x0191 }
354 0x0199 { cp = 0x0198 }
355 0x01a8 { cp = 0x01a7 }
356 0x01ad { cp = 0x01ac }
357 0x01b0 { cp = 0x01af }
358 0x01b9 { cp = 0x01b8 }
359 0x01bd { cp = 0x01bc }
360 0x01f5 { cp = 0x01f4 }
361 0x023c { cp = 0x023b }
362 0x0242 { cp = 0x0241 }
363 0x037b { cp = 0x03fd }
364 0x037c { cp = 0x03fe }
365 0x037d { cp = 0x03ff }
366 0x03f3 { cp = 0x037f }
367 0x03ac { cp = 0x0386 }
368 0x03ad { cp = 0x0388 }
369 0x03ae { cp = 0x0389 }
370 0x03af { cp = 0x038a }
371 0x03cc { cp = 0x038c }
372 0x03cd { cp = 0x038e }
373 0x03ce { cp = 0x038f }
374 0x0371 { cp = 0x0370 }
375 0x0373 { cp = 0x0372 }
376 0x0377 { cp = 0x0376 }
377 0x03d1 { cp = 0x0398 }
378 0x03d7 { cp = 0x03cf }
379 0x03f2 { cp = 0x03f9 }
380 0x03f8 { cp = 0x03f7 }
381 0x03fb { cp = 0x03fa }
382 // GREEK
383 0x1F51 { cp = 0x1F59 }
384 0x1F53 { cp = 0x1F5B }
385 0x1F55 { cp = 0x1F5D }
386 0x1F57 { cp = 0x1F5F }
387 0x1FB0 { cp = 0x1FB8 }
388 0x1FB1 { cp = 0x1FB9 }
389 0x1FD0 { cp = 0x1FD8 }
390 0x1FD1 { cp = 0x1FD9 }
391 0x1FE0 { cp = 0x1FE8 }
392 0x1FE1 { cp = 0x1FE9 }
393 else {}
394 }
395 }
396
397 return cp
398}
399
400// convert_case converts letter cases
401//
402// if upper_flag == true then convert lowercase ==> uppercase
403// if upper_flag == false then convert uppercase ==> lowercase
404@[direct_array_access]
405fn convert_case(s string, upper_flag bool) string {
406 mut index := 0
407 mut tab_char := 0
408 mut str_res := unsafe { malloc_noscan(s.len + 1) }
409
410 for {
411 ch_len := utf8_char_len(s[index])
412
413 if ch_len == 1 {
414 if upper_flag == true {
415 unsafe {
416 // Subtract 0x20 from ASCII lowercase to convert to uppercase.
417 c := s[index]
418 str_res[index] = if c >= 0x61 && c <= 0x7a { c & 0xdf } else { c }
419 }
420 } else {
421 unsafe {
422 // Add 0x20 to ASCII uppercase to convert to lowercase.
423 c := s[index]
424 str_res[index] = if c >= 0x41 && c <= 0x5a { c | 0x20 } else { c }
425 }
426 }
427 } else if ch_len > 1 && ch_len < 5 {
428 mut lword := 0
429
430 for i := 0; i < ch_len; i++ {
431 lword = int(u32(lword) << 8 | u32(s[index + i]))
432 }
433
434 // println("#${index} (${lword})")
435
436 mut res := 0
437
438 // 2 byte utf-8
439 // byte format: 110xxxxx 10xxxxxx
440 //
441 if ch_len == 2 {
442 res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
443 }
444 // 3 byte utf-8
445 // byte format: 1110xxxx 10xxxxxx 10xxxxxx
446 //
447 else if ch_len == 3 {
448 res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
449 }
450 // 4 byte utf-8
451 // byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
452 //
453 else if ch_len == 4 {
454 res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
455 }
456
457 // println("res: ${res.hex():8}")
458
459 if upper_flag == false {
460 tab_char = utf8_to_lower(res)
461 } else {
462 tab_char = utf8_to_upper(res)
463 }
464
465 if ch_len == 2 {
466 ch0 := u8((tab_char >> 6) & 0x1f) | 0xc0 // 110x xxxx
467 ch1 := u8((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
468 // C.printf("[%02x%02x] \n",ch0,ch1)
469
470 unsafe {
471 str_res[index + 0] = ch0
472 str_res[index + 1] = ch1
473 }
474 ///***************************************************************
475 // BUG: doesn't compile, workaround use shitf to right of 0 bit
476 ///***************************************************************
477 // str_res[index + 1 ] = u8( tab_char & 0xbf ) // 1011 1111
478 } else if ch_len == 3 {
479 ch0 := u8((tab_char >> 12) & 0x0f) | 0xe0 // 1110 xxxx
480 ch1 := u8((tab_char >> 6) & 0x3f) | 0x80 // 10xx xxxx
481 ch2 := u8((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
482 // C.printf("[%02x%02x%02x] \n",ch0,ch1,ch2)
483
484 unsafe {
485 str_res[index + 0] = ch0
486 str_res[index + 1] = ch1
487 str_res[index + 2] = ch2
488 }
489 }
490 // TODO: write if needed
491 else if ch_len == 4 {
492 // place holder!!
493 // at the present time simply copy the utf8 char
494 for i in 0 .. ch_len {
495 unsafe {
496 str_res[index + i] = s[index + i]
497 }
498 }
499 }
500 } else {
501 // other cases, just copy the string
502 for i in 0 .. ch_len {
503 unsafe {
504 str_res[index + i] = s[index + i]
505 }
506 }
507 }
508
509 index += ch_len
510
511 // we are done, exit the loop
512 if index >= s.len {
513 break
514 }
515 }
516
517 // for c compatibility set the ending 0
518 unsafe {
519 str_res[index] = 0
520 return tos(str_res, s.len)
521 }
522}
523
524// find_punct_in_table looks for valid punctuation in table
525@[direct_array_access]
526fn find_punct_in_table(in_code rune, in_table []rune) rune {
527 // uses simple binary search
528
529 mut first_index := 0
530 mut last_index := (in_table.len)
531 mut index := 0
532 mut x := rune(0)
533
534 for {
535 x = in_table[index]
536 // C.printf("(%d..%d) index:%d base[%08x]==>[%08x]\n",first_index,last_index,index,in_code,x)
537
538 if x == in_code {
539 return index
540 } else if x > in_code {
541 last_index = index
542 } else {
543 first_index = index
544 }
545
546 if (last_index - first_index) <= 1 {
547 break
548 }
549 index = (first_index + last_index) >> 1
550 }
551
552 return -1
553}
554
555// Unicode punctuation chars
556//
557// source: http://www.unicode.org/faq/punctuation_symbols.html
558
559// Western punctuation mark
560// Character Name Browser Image
561const unicode_punct_western = [
562 rune(0x0021), // EXCLAMATION MARK !
563 0x0022, // QUOTATION MARK "
564 0x0027, // APOSTROPHE '
565 0x002A, // ASTERISK *
566 0x002C, // COMMA ,
567 0x002E, // FULL STOP .
568 0x002F, // SOLIDUS /
569 0x003A, // COLON :
570 0x003B, // SEMICOLON ;
571 0x003F, // QUESTION MARK ?
572 0x00A1, // INVERTED EXCLAMATION MARK ¡
573 0x00A7, // SECTION SIGN §
574 0x00B6, // PILCROW SIGN ¶
575 0x00B7, // MIDDLE DOT ·
576 0x00BF, // INVERTED QUESTION MARK ¿
577 0x037E, // GREEK QUESTION MARK ;
578 0x0387, // GREEK ANO TELEIA ·
579 0x055A, // ARMENIAN APOSTROPHE ՚
580 0x055B, // ARMENIAN EMPHASIS MARK ՛
581 0x055C, // ARMENIAN EXCLAMATION MARK ՜
582 0x055D, // ARMENIAN COMMA ՝
583 0x055E, // ARMENIAN QUESTION MARK ՞
584 0x055F, // ARMENIAN ABBREVIATION MARK ՟
585 0x0589, // ARMENIAN FULL STOP ։
586 0x05C0, // HEBREW PUNCTUATION PASEQ ׀
587 0x05C3, // HEBREW PUNCTUATION SOF PASUQ ׃
588 0x05C6, // HEBREW PUNCTUATION NUN HAFUKHA ׆
589 0x05F3, // HEBREW PUNCTUATION GERESH ׳
590 0x05F4, // HEBREW PUNCTUATION GERSHAYIM ״
591]
592
593// Unicode Characters in the 'Punctuation, Other' Category
594// Character Name Browser Image
595const unicode_punct = [
596 rune(0x0021), // EXCLAMATION MARK !
597 0x0022, // QUOTATION MARK "
598 0x0023, // NUMBER SIGN #
599 0x0025, // PERCENT SIGN %
600 0x0026, // AMPERSAND &
601 0x0027, // APOSTROPHE '
602 0x002A, // ASTERISK *
603 0x002C, // COMMA ,
604 0x002E, // FULL STOP .
605 0x002F, // SOLIDUS /
606 0x003A, // COLON :
607 0x003B, // SEMICOLON ;
608 0x003F, // QUESTION MARK ?
609 0x0040, // COMMERCIAL AT @
610 0x005C, // REVERSE SOLIDUS \
611 0x00A1, // INVERTED EXCLAMATION MARK ¡
612 0x00A7, // SECTION SIGN §
613 0x00B6, // PILCROW SIGN ¶
614 0x00B7, // MIDDLE DOT ·
615 0x00BF, // INVERTED QUESTION MARK ¿
616 0x037E, // GREEK QUESTION MARK ;
617 0x0387, // GREEK ANO TELEIA ·
618 0x055A, // ARMENIAN APOSTROPHE ՚
619 0x055B, // ARMENIAN EMPHASIS MARK ՛
620 0x055C, // ARMENIAN EXCLAMATION MARK ՜
621 0x055D, // ARMENIAN COMMA ՝
622 0x055E, // ARMENIAN QUESTION MARK ՞
623 0x055F, // ARMENIAN ABBREVIATION MARK ՟
624 0x0589, // ARMENIAN FULL STOP ։
625 0x05C0, // HEBREW PUNCTUATION PASEQ ׀
626 0x05C3, // HEBREW PUNCTUATION SOF PASUQ ׃
627 0x05C6, // HEBREW PUNCTUATION NUN HAFUKHA ׆
628 0x05F3, // HEBREW PUNCTUATION GERESH ׳
629 0x05F4, // HEBREW PUNCTUATION GERSHAYIM ״
630 0x0609, // ARABIC-INDIC PER MILLE SIGN ؉
631 0x060A, // ARABIC-INDIC PER TEN THOUSAND SIGN ؊
632 0x060C, // ARABIC COMMA ،
633 0x060D, // ARABIC DATE SEPARATOR ؍
634 0x061B, // ARABIC SEMICOLON ؛
635 0x061E, // ARABIC TRIPLE DOT PUNCTUATION MARK ؞
636 0x061F, // ARABIC QUESTION MARK ؟
637 0x066A, // ARABIC PERCENT SIGN ٪
638 0x066B, // ARABIC DECIMAL SEPARATOR ٫
639 0x066C, // ARABIC THOUSANDS SEPARATOR ٬
640 0x066D, // ARABIC FIVE POINTED STAR ٭
641 0x06D4, // ARABIC FULL STOP ۔
642 0x0700, // SYRIAC END OF PARAGRAPH ܀
643 0x0701, // SYRIAC SUPRALINEAR FULL STOP ܁
644 0x0702, // SYRIAC SUBLINEAR FULL STOP ܂
645 0x0703, // SYRIAC SUPRALINEAR COLON ܃
646 0x0704, // SYRIAC SUBLINEAR COLON ܄
647 0x0705, // SYRIAC HORIZONTAL COLON ܅
648 0x0706, // SYRIAC COLON SKEWED LEFT ܆
649 0x0707, // SYRIAC COLON SKEWED RIGHT ܇
650 0x0708, // SYRIAC SUPRALINEAR COLON SKEWED LEFT ܈
651 0x0709, // SYRIAC SUBLINEAR COLON SKEWED RIGHT ܉
652 0x070A, // SYRIAC CONTRACTION ܊
653 0x070B, // SYRIAC HARKLEAN OBELUS ܋
654 0x070C, // SYRIAC HARKLEAN METOBELUS ܌
655 0x070D, // SYRIAC HARKLEAN ASTERISCUS ܍
656 0x07F7, // NKO SYMBOL GBAKURUNEN ߷
657 0x07F8, // NKO COMMA ߸
658 0x07F9, // NKO EXCLAMATION MARK ߹
659 0x0830, // SAMARITAN PUNCTUATION NEQUDAA ࠰
660 0x0831, // SAMARITAN PUNCTUATION AFSAAQ ࠱
661 0x0832, // SAMARITAN PUNCTUATION ANGED ࠲
662 0x0833, // SAMARITAN PUNCTUATION BAU ࠳
663 0x0834, // SAMARITAN PUNCTUATION ATMAAU ࠴
664 0x0835, // SAMARITAN PUNCTUATION SHIYYAALAA ࠵
665 0x0836, // SAMARITAN ABBREVIATION MARK ࠶
666 0x0837, // SAMARITAN PUNCTUATION MELODIC QITSA ࠷
667 0x0838, // SAMARITAN PUNCTUATION ZIQAA ࠸
668 0x0839, // SAMARITAN PUNCTUATION QITSA ࠹
669 0x083A, // SAMARITAN PUNCTUATION ZAEF ࠺
670 0x083B, // SAMARITAN PUNCTUATION TURU ࠻
671 0x083C, // SAMARITAN PUNCTUATION ARKAANU ࠼
672 0x083D, // SAMARITAN PUNCTUATION SOF MASHFAAT ࠽
673 0x083E, // SAMARITAN PUNCTUATION ANNAAU ࠾
674 0x085E, // MANDAIC PUNCTUATION ࡞
675 0x0964, // DEVANAGARI DANDA ।
676 0x0965, // DEVANAGARI DOUBLE DANDA ॥
677 0x0970, // DEVANAGARI ABBREVIATION SIGN ॰
678 0x09FD, // BENGALI ABBREVIATION SIGN ৽
679 0x0A76, // GURMUKHI ABBREVIATION SIGN ੶
680 0x0AF0, // GUJARATI ABBREVIATION SIGN ૰
681 0x0C77, // TELUGU SIGN SIDDHAM ౷
682 0x0C84, // KANNADA SIGN SIDDHAM ಄
683 0x0DF4, // SINHALA PUNCTUATION KUNDDALIYA ෴
684 0x0E4F, // THAI CHARACTER FONGMAN ๏
685 0x0E5A, // THAI CHARACTER ANGKHANKHU ๚
686 0x0E5B, // THAI CHARACTER KHOMUT ๛
687 0x0F04, // TIBETAN MARK INITIAL YIG MGO MDUN MA ༄
688 0x0F05, // TIBETAN MARK CLOSING YIG MGO SGAB MA ༅
689 0x0F06, // TIBETAN MARK CARET YIG MGO PHUR SHAD MA ༆
690 0x0F07, // TIBETAN MARK YIG MGO TSHEG SHAD MA ༇
691 0x0F08, // TIBETAN MARK SBRUL SHAD ༈
692 0x0F09, // TIBETAN MARK BSKUR YIG MGO ༉
693 0x0F0A, // TIBETAN MARK BKA- SHOG YIG MGO ༊
694 0x0F0B, // TIBETAN MARK INTERSYLLABIC TSHEG ་
695 0x0F0C, // TIBETAN MARK DELIMITER TSHEG BSTAR ༌
696 0x0F0D, // TIBETAN MARK SHAD །
697 0x0F0E, // TIBETAN MARK NYIS SHAD ༎
698 0x0F0F, // TIBETAN MARK TSHEG SHAD ༏
699 0x0F10, // TIBETAN MARK NYIS TSHEG SHAD ༐
700 0x0F11, // TIBETAN MARK RIN CHEN SPUNGS SHAD ༑
701 0x0F12, // TIBETAN MARK RGYA GRAM SHAD ༒
702 0x0F14, // TIBETAN MARK GTER TSHEG ༔
703 0x0F85, // TIBETAN MARK PALUTA ྅
704 0x0FD0, // TIBETAN MARK BSKA- SHOG GI MGO RGYAN ࿐
705 0x0FD1, // TIBETAN MARK MNYAM YIG GI MGO RGYAN ࿑
706 0x0FD2, // TIBETAN MARK NYIS TSHEG ࿒
707 0x0FD3, // TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA ࿓
708 0x0FD4, // TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA ࿔
709 0x0FD9, // TIBETAN MARK LEADING MCHAN RTAGS ࿙
710 0x0FDA, // TIBETAN MARK TRAILING MCHAN RTAGS ࿚
711 0x104A, // MYANMAR SIGN LITTLE SECTION ၊
712 0x104B, // MYANMAR SIGN SECTION ။
713 0x104C, // MYANMAR SYMBOL LOCATIVE ၌
714 0x104D, // MYANMAR SYMBOL COMPLETED ၍
715 0x104E, // MYANMAR SYMBOL AFOREMENTIONED ၎
716 0x104F, // MYANMAR SYMBOL GENITIVE ၏
717 0x10FB, // GEORGIAN PARAGRAPH SEPARATOR ჻
718 0x1360, // ETHIOPIC SECTION MARK ፠
719 0x1361, // ETHIOPIC WORDSPACE ፡
720 0x1362, // ETHIOPIC FULL STOP ።
721 0x1363, // ETHIOPIC COMMA ፣
722 0x1364, // ETHIOPIC SEMICOLON ፤
723 0x1365, // ETHIOPIC COLON ፥
724 0x1366, // ETHIOPIC PREFACE COLON ፦
725 0x1367, // ETHIOPIC QUESTION MARK ፧
726 0x1368, // ETHIOPIC PARAGRAPH SEPARATOR ፨
727 0x166E, // CANADIAN SYLLABICS FULL STOP ᙮
728 0x16EB, // RUNIC SINGLE PUNCTUATION ᛫
729 0x16EC, // RUNIC MULTIPLE PUNCTUATION ᛬
730 0x16ED, // RUNIC CROSS PUNCTUATION ᛭
731 0x1735, // PHILIPPINE SINGLE PUNCTUATION ᜵
732 0x1736, // PHILIPPINE DOUBLE PUNCTUATION ᜶
733 0x17D4, // KHMER SIGN KHAN ។
734 0x17D5, // KHMER SIGN BARIYOOSAN ៕
735 0x17D6, // KHMER SIGN CAMNUC PII KUUH ៖
736 0x17D8, // KHMER SIGN BEYYAL ៘
737 0x17D9, // KHMER SIGN PHNAEK MUAN ៙
738 0x17DA, // KHMER SIGN KOOMUUT ៚
739 0x1800, // MONGOLIAN BIRGA ᠀
740 0x1801, // MONGOLIAN ELLIPSIS ᠁
741 0x1802, // MONGOLIAN COMMA ᠂
742 0x1803, // MONGOLIAN FULL STOP ᠃
743 0x1804, // MONGOLIAN COLON ᠄
744 0x1805, // MONGOLIAN FOUR DOTS ᠅
745 0x1807, // MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER ᠇
746 0x1808, // MONGOLIAN MANCHU COMMA ᠈
747 0x1809, // MONGOLIAN MANCHU FULL STOP ᠉
748 0x180A, // MONGOLIAN NIRUGU ᠊
749 0x1944, // LIMBU EXCLAMATION MARK ᥄
750 0x1945, // LIMBU QUESTION MARK ᥅
751 0x1A1E, // BUGINESE PALLAWA ᨞
752 0x1A1F, // BUGINESE END OF SECTION ᨟
753 0x1AA0, // TAI THAM SIGN WIANG ᪠
754 0x1AA1, // TAI THAM SIGN WIANGWAAK ᪡
755 0x1AA2, // TAI THAM SIGN SAWAN ᪢
756 0x1AA3, // TAI THAM SIGN KEOW ᪣
757 0x1AA4, // TAI THAM SIGN HOY ᪤
758 0x1AA5, // TAI THAM SIGN DOKMAI ᪥
759 0x1AA6, // TAI THAM SIGN REVERSED ROTATED RANA ᪦
760 0x1AA8, // TAI THAM SIGN KAAN ᪨
761 0x1AA9, // TAI THAM SIGN KAANKUU ᪩
762 0x1AAA, // TAI THAM SIGN SATKAAN ᪪
763 0x1AAB, // TAI THAM SIGN SATKAANKUU ᪫
764 0x1AAC, // TAI THAM SIGN HANG ᪬
765 0x1AAD, // TAI THAM SIGN CAANG ᪭
766 0x1B5A, // BALINESE PANTI ᭚
767 0x1B5B, // BALINESE PAMADA ᭛
768 0x1B5C, // BALINESE WINDU ᭜
769 0x1B5D, // BALINESE CARIK PAMUNGKAH ᭝
770 0x1B5E, // BALINESE CARIK SIKI ᭞
771 0x1B5F, // BALINESE CARIK PAREREN ᭟
772 0x1B60, // BALINESE PAMENENG ᭠
773 0x1BFC, // BATAK SYMBOL BINDU NA METEK ᯼
774 0x1BFD, // BATAK SYMBOL BINDU PINARBORAS ᯽
775 0x1BFE, // BATAK SYMBOL BINDU JUDUL ᯾
776 0x1BFF, // BATAK SYMBOL BINDU PANGOLAT ᯿
777 0x1C3B, // LEPCHA PUNCTUATION TA-ROL ᰻
778 0x1C3C, // LEPCHA PUNCTUATION NYET THYOOM TA-ROL ᰼
779 0x1C3D, // LEPCHA PUNCTUATION CER-WA ᰽
780 0x1C3E, // LEPCHA PUNCTUATION TSHOOK CER-WA ᰾
781 0x1C3F, // LEPCHA PUNCTUATION TSHOOK ᰿
782 0x1C7E, // OL CHIKI PUNCTUATION MUCAAD ᱾
783 0x1C7F, // OL CHIKI PUNCTUATION DOUBLE MUCAAD ᱿
784 0x1CC0, // SUNDANESE PUNCTUATION BINDU SURYA ᳀
785 0x1CC1, // SUNDANESE PUNCTUATION BINDU PANGLONG ᳁
786 0x1CC2, // SUNDANESE PUNCTUATION BINDU PURNAMA ᳂
787 0x1CC3, // SUNDANESE PUNCTUATION BINDU CAKRA ᳃
788 0x1CC4, // SUNDANESE PUNCTUATION BINDU LEU SATANGA ᳄
789 0x1CC5, // SUNDANESE PUNCTUATION BINDU KA SATANGA ᳅
790 0x1CC6, // SUNDANESE PUNCTUATION BINDU DA SATANGA ᳆
791 0x1CC7, // SUNDANESE PUNCTUATION BINDU BA SATANGA ᳇
792 0x1CD3, // VEDIC SIGN NIHSHVASA ᳓
793 0x2016, // DOUBLE VERTICAL LINE ‖
794 0x2017, // DOUBLE LOW LINE ‗
795 0x2020, // DAGGER †
796 0x2021, // DOUBLE DAGGER ‡
797 0x2022, // BULLET •
798 0x2023, // TRIANGULAR BULLET ‣
799 0x2024, // ONE DOT LEADER ․
800 0x2025, // TWO DOT LEADER ‥
801 0x2026, // HORIZONTAL ELLIPSIS …
802 0x2027, // HYPHENATION POINT ‧
803 0x2030, // PER MILLE SIGN ‰
804 0x2031, // PER TEN THOUSAND SIGN ‱
805 0x2032, // PRIME ′
806 0x2033, // DOUBLE PRIME ″
807 0x2034, // TRIPLE PRIME ‴
808 0x2035, // REVERSED PRIME ‵
809 0x2036, // REVERSED DOUBLE PRIME ‶
810 0x2037, // REVERSED TRIPLE PRIME ‷
811 0x2038, // CARET ‸
812 0x203B, // REFERENCE MARK ※
813 0x203C, // DOUBLE EXCLAMATION MARK ‼
814 0x203D, // INTERROBANG ‽
815 0x203E, // OVERLINE ‾
816 0x2041, // CARET INSERTION POINT ⁁
817 0x2042, // ASTERISM ⁂
818 0x2043, // HYPHEN BULLET ⁃
819 0x2047, // DOUBLE QUESTION MARK ⁇
820 0x2048, // QUESTION EXCLAMATION MARK ⁈
821 0x2049, // EXCLAMATION QUESTION MARK ⁉
822 0x204A, // TIRONIAN SIGN ET ⁊
823 0x204B, // REVERSED PILCROW SIGN ⁋
824 0x204C, // BLACK LEFTWARDS BULLET ⁌
825 0x204D, // BLACK RIGHTWARDS BULLET ⁍
826 0x204E, // LOW ASTERISK ⁎
827 0x204F, // REVERSED SEMICOLON ⁏
828 0x2050, // CLOSE UP ⁐
829 0x2051, // TWO ASTERISKS ALIGNED VERTICALLY ⁑
830 0x2053, // SWUNG DASH ⁓
831 0x2055, // FLOWER PUNCTUATION MARK ⁕
832 0x2056, // THREE DOT PUNCTUATION ⁖
833 0x2057, // QUADRUPLE PRIME ⁗
834 0x2058, // FOUR DOT PUNCTUATION ⁘
835 0x2059, // FIVE DOT PUNCTUATION ⁙
836 0x205A, // TWO DOT PUNCTUATION ⁚
837 0x205B, // FOUR DOT MARK ⁛
838 0x205C, // DOTTED CROSS ⁜
839 0x205D, // TRICOLON ⁝
840 0x205E, // VERTICAL FOUR DOTS ⁞
841 0x2CF9, // COPTIC OLD NUBIAN FULL STOP ⳹
842 0x2CFA, // COPTIC OLD NUBIAN DIRECT QUESTION MARK ⳺
843 0x2CFB, // COPTIC OLD NUBIAN INDIRECT QUESTION MARK ⳻
844 0x2CFC, // COPTIC OLD NUBIAN VERSE DIVIDER ⳼
845 0x2CFE, // COPTIC FULL STOP ⳾
846 0x2CFF, // COPTIC MORPHOLOGICAL DIVIDER ⳿
847 0x2D70, // TIFINAGH SEPARATOR MARK ⵰
848 0x2E00, // RIGHT ANGLE SUBSTITUTION MARKER ⸀
849 0x2E01, // RIGHT ANGLE DOTTED SUBSTITUTION MARKER ⸁
850 0x2E06, // RAISED INTERPOLATION MARKER ⸆
851 0x2E07, // RAISED DOTTED INTERPOLATION MARKER ⸇
852 0x2E08, // DOTTED TRANSPOSITION MARKER ⸈
853 0x2E0B, // RAISED SQUARE ⸋
854 0x2E0E, // EDITORIAL CORONIS ⸎
855 0x2E0F, // PARAGRAPHOS ⸏
856 0x2E10, // FORKED PARAGRAPHOS ⸐
857 0x2E11, // REVERSED FORKED PARAGRAPHOS ⸑
858 0x2E12, // HYPODIASTOLE ⸒
859 0x2E13, // DOTTED OBELOS ⸓
860 0x2E14, // DOWNWARDS ANCORA ⸔
861 0x2E15, // UPWARDS ANCORA ⸕
862 0x2E16, // DOTTED RIGHT-POINTING ANGLE ⸖
863 0x2E18, // INVERTED INTERROBANG ⸘
864 0x2E19, // PALM BRANCH ⸙
865 0x2E1B, // TILDE WITH RING ABOVE ⸛
866 0x2E1E, // TILDE WITH DOT ABOVE ⸞
867 0x2E1F, // TILDE WITH DOT BELOW ⸟
868 0x2E2A, // TWO DOTS OVER ONE DOT PUNCTUATION ⸪
869 0x2E2B, // ONE DOT OVER TWO DOTS PUNCTUATION ⸫
870 0x2E2C, // SQUARED FOUR DOT PUNCTUATION ⸬
871 0x2E2D, // FIVE DOT MARK ⸭
872 0x2E2E, // REVERSED QUESTION MARK ⸮
873 0x2E30, // RING POINT ⸰
874 0x2E31, // WORD SEPARATOR MIDDLE DOT ⸱
875 0x2E32, // TURNED COMMA ⸲
876 0x2E33, // RAISED DOT ⸳
877 0x2E34, // RAISED COMMA ⸴
878 0x2E35, // TURNED SEMICOLON ⸵
879 0x2E36, // DAGGER WITH LEFT GUARD ⸶
880 0x2E37, // DAGGER WITH RIGHT GUARD ⸷
881 0x2E38, // TURNED DAGGER ⸸
882 0x2E39, // TOP HALF SECTION SIGN ⸹
883 0x2E3C, // STENOGRAPHIC FULL STOP ⸼
884 0x2E3D, // VERTICAL SIX DOTS ⸽
885 0x2E3E, // WIGGLY VERTICAL LINE ⸾
886 0x2E3F, // CAPITULUM ⸿
887 0x2E41, // REVERSED COMMA ⹁
888 0x2E43, // DASH WITH LEFT UPTURN ⹃
889 0x2E44, // DOUBLE SUSPENSION MARK ⹄
890 0x2E45, // INVERTED LOW KAVYKA ⹅
891 0x2E46, // INVERTED LOW KAVYKA WITH KAVYKA ABOVE ⹆
892 0x2E47, // LOW KAVYKA ⹇
893 0x2E48, // LOW KAVYKA WITH DOT ⹈
894 0x2E49, // DOUBLE STACKED COMMA ⹉
895 0x2E4A, // DOTTED SOLIDUS ⹊
896 0x2E4B, // TRIPLE DAGGER ⹋
897 0x2E4C, // MEDIEVAL COMMA ⹌
898 0x2E4D, // PARAGRAPHUS MARK ⹍
899 0x2E4E, // PUNCTUS ELEVATUS MARK ⹎
900 0x2E4F, // CORNISH VERSE DIVIDER ⹏
901 0x3001, // IDEOGRAPHIC COMMA 、
902 0x3002, // IDEOGRAPHIC FULL STOP 。
903 0x3003, // DITTO MARK 〃
904 0x303D, // PART ALTERNATION MARK 〽
905 0x30FB, // KATAKANA MIDDLE DOT ・
906 0xA4FE, // LISU PUNCTUATION COMMA ꓾
907 0xA4FF, // LISU PUNCTUATION FULL STOP ꓿
908 0xA60D, // VAI COMMA ꘍
909 0xA60E, // VAI FULL STOP ꘎
910 0xA60F, // VAI QUESTION MARK ꘏
911 0xA673, // SLAVONIC ASTERISK ꙳
912 0xA67E, // CYRILLIC KAVYKA ꙾
913 0xA6F2, // BAMUM NJAEMLI ꛲
914 0xA6F3, // BAMUM FULL STOP ꛳
915 0xA6F4, // BAMUM COLON ꛴
916 0xA6F5, // BAMUM COMMA ꛵
917 0xA6F6, // BAMUM SEMICOLON ꛶
918 0xA6F7, // BAMUM QUESTION MARK ꛷
919 0xA874, // PHAGS-PA SINGLE HEAD MARK ꡴
920 0xA875, // PHAGS-PA DOUBLE HEAD MARK ꡵
921 0xA876, // PHAGS-PA MARK SHAD ꡶
922 0xA877, // PHAGS-PA MARK DOUBLE SHAD ꡷
923 0xA8CE, // SAURASHTRA DANDA ꣎
924 0xA8CF, // SAURASHTRA DOUBLE DANDA ꣏
925 0xA8F8, // DEVANAGARI SIGN PUSHPIKA ꣸
926 0xA8F9, // DEVANAGARI GAP FILLER ꣹
927 0xA8FA, // DEVANAGARI CARET ꣺
928 0xA8FC, // DEVANAGARI SIGN SIDDHAM ꣼
929 0xA92E, // KAYAH LI SIGN CWI ꤮
930 0xA92F, // KAYAH LI SIGN SHYA ꤯
931 0xA95F, // REJANG SECTION MARK ꥟
932 0xA9C1, // JAVANESE LEFT RERENGGAN ꧁
933 0xA9C2, // JAVANESE RIGHT RERENGGAN ꧂
934 0xA9C3, // JAVANESE PADA ANDAP ꧃
935 0xA9C4, // JAVANESE PADA MADYA ꧄
936 0xA9C5, // JAVANESE PADA LUHUR ꧅
937 0xA9C6, // JAVANESE PADA WINDU ꧆
938 0xA9C7, // JAVANESE PADA PANGKAT ꧇
939 0xA9C8, // JAVANESE PADA LINGSA ꧈
940 0xA9C9, // JAVANESE PADA LUNGSI ꧉
941 0xA9CA, // JAVANESE PADA ADEG ꧊
942 0xA9CB, // JAVANESE PADA ADEG ADEG ꧋
943 0xA9CC, // JAVANESE PADA PISELEH ꧌
944 0xA9CD, // JAVANESE TURNED PADA PISELEH ꧍
945 0xA9DE, // JAVANESE PADA TIRTA TUMETES ꧞
946 0xA9DF, // JAVANESE PADA ISEN-ISEN ꧟
947 0xAA5C, // CHAM PUNCTUATION SPIRAL ꩜
948 0xAA5D, // CHAM PUNCTUATION DANDA ꩝
949 0xAA5E, // CHAM PUNCTUATION DOUBLE DANDA ꩞
950 0xAA5F, // CHAM PUNCTUATION TRIPLE DANDA ꩟
951 0xAADE, // TAI VIET SYMBOL HO HOI ꫞
952 0xAADF, // TAI VIET SYMBOL KOI KOI ꫟
953 0xAAF0, // MEETEI MAYEK CHEIKHAN ꫰
954 0xAAF1, // MEETEI MAYEK AHANG KHUDAM ꫱
955 0xABEB, // MEETEI MAYEK CHEIKHEI ꯫
956 0xFE10, // PRESENTATION FORM FOR VERTICAL COMMA ︐
957 0xFE11, // PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA ︑
958 0xFE12, // PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP ︒
959 0xFE13, // PRESENTATION FORM FOR VERTICAL COLON ︓
960 0xFE14, // PRESENTATION FORM FOR VERTICAL SEMICOLON ︔
961 0xFE15, // PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK ︕
962 0xFE16, // PRESENTATION FORM FOR VERTICAL QUESTION MARK ︖
963 0xFE19, // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS ︙
964 0xFE30, // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER ︰
965 0xFE45, // SESAME DOT ﹅
966 0xFE46, // WHITE SESAME DOT ﹆
967 0xFE49, // DASHED OVERLINE ﹉
968 0xFE4A, // CENTRELINE OVERLINE ﹊
969 0xFE4B, // WAVY OVERLINE ﹋
970 0xFE4C, // DOUBLE WAVY OVERLINE ﹌
971 0xFE50, // SMALL COMMA ﹐
972 0xFE51, // SMALL IDEOGRAPHIC COMMA ﹑
973 0xFE52, // SMALL FULL STOP ﹒
974 0xFE54, // SMALL SEMICOLON ﹔
975 0xFE55, // SMALL COLON ﹕
976 0xFE56, // SMALL QUESTION MARK ﹖
977 0xFE57, // SMALL EXCLAMATION MARK ﹗
978 0xFE5F, // SMALL NUMBER SIGN ﹟
979 0xFE60, // SMALL AMPERSAND ﹠
980 0xFE61, // SMALL ASTERISK ﹡
981 0xFE68, // SMALL REVERSE SOLIDUS ﹨
982 0xFE6A, // SMALL PERCENT SIGN ﹪
983 0xFE6B, // SMALL COMMERCIAL AT ﹫
984 0xFF01, // FULLWIDTH EXCLAMATION MARK !
985 0xFF02, // FULLWIDTH QUOTATION MARK "
986 0xFF03, // FULLWIDTH NUMBER SIGN #
987 0xFF05, // FULLWIDTH PERCENT SIGN %
988 0xFF06, // FULLWIDTH AMPERSAND &
989 0xFF07, // FULLWIDTH APOSTROPHE '
990 0xFF0A, // FULLWIDTH ASTERISK *
991 0xFF0C, // FULLWIDTH COMMA ,
992 0xFF0E, // FULLWIDTH FULL STOP .
993 0xFF0F, // FULLWIDTH SOLIDUS /
994 0xFF1A, // FULLWIDTH COLON :
995 0xFF1B, // FULLWIDTH SEMICOLON ;
996 0xFF1F, // FULLWIDTH QUESTION MARK ?
997 0xFF20, // FULLWIDTH COMMERCIAL AT @
998 0xFF3C, // FULLWIDTH REVERSE SOLIDUS \
999 0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP 。
1000 0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA 、
1001 0xFF65, // HALFWIDTH KATAKANA MIDDLE DOT ・
1002 0x10100, // AEGEAN WORD SEPARATOR LINE 𐄀
1003 0x10101, // AEGEAN WORD SEPARATOR DOT 𐄁
1004 0x10102, // AEGEAN CHECK MARK 𐄂
1005 0x1039F, // UGARITIC WORD DIVIDER 𐎟
1006 0x103D0, // OLD PERSIAN WORD DIVIDER 𐏐
1007 0x1056F, // CAUCASIAN ALBANIAN CITATION MARK 𐕯
1008 0x10857, // IMPERIAL ARAMAIC SECTION SIGN 𐡗
1009 0x1091F, // PHOENICIAN WORD SEPARATOR 𐤟
1010 0x1093F, // LYDIAN TRIANGULAR MARK 𐤿
1011 0x10A50, // KHAROSHTHI PUNCTUATION DOT 𐩐
1012 0x10A51, // KHAROSHTHI PUNCTUATION SMALL CIRCLE 𐩑
1013 0x10A52, // KHAROSHTHI PUNCTUATION CIRCLE 𐩒
1014 0x10A53, // KHAROSHTHI PUNCTUATION CRESCENT BAR 𐩓
1015 0x10A54, // KHAROSHTHI PUNCTUATION MANGALAM 𐩔
1016 0x10A55, // KHAROSHTHI PUNCTUATION LOTUS 𐩕
1017 0x10A56, // KHAROSHTHI PUNCTUATION DANDA 𐩖
1018 0x10A57, // KHAROSHTHI PUNCTUATION DOUBLE DANDA 𐩗
1019 0x10A58, // KHAROSHTHI PUNCTUATION LINES 𐩘
1020 0x10A7F, // OLD SOUTH ARABIAN NUMERIC INDICATOR 𐩿
1021 0x10AF0, // MANICHAEAN PUNCTUATION STAR 𐫰
1022 0x10AF1, // MANICHAEAN PUNCTUATION FLEURON 𐫱
1023 0x10AF2, // MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT 𐫲
1024 0x10AF3, // MANICHAEAN PUNCTUATION DOT WITHIN DOT 𐫳
1025 0x10AF4, // MANICHAEAN PUNCTUATION DOT 𐫴
1026 0x10AF5, // MANICHAEAN PUNCTUATION TWO DOTS 𐫵
1027 0x10AF6, // MANICHAEAN PUNCTUATION LINE FILLER 𐫶
1028 0x10B39, // AVESTAN ABBREVIATION MARK 𐬹
1029 0x10B3A, // TINY TWO DOTS OVER ONE DOT PUNCTUATION 𐬺
1030 0x10B3B, // SMALL TWO DOTS OVER ONE DOT PUNCTUATION 𐬻
1031 0x10B3C, // LARGE TWO DOTS OVER ONE DOT PUNCTUATION 𐬼
1032 0x10B3D, // LARGE ONE DOT OVER TWO DOTS PUNCTUATION 𐬽
1033 0x10B3E, // LARGE TWO RINGS OVER ONE RING PUNCTUATION 𐬾
1034 0x10B3F, // LARGE ONE RING OVER TWO RINGS PUNCTUATION 𐬿
1035 0x10B99, // PSALTER PAHLAVI SECTION MARK 𐮙
1036 0x10B9A, // PSALTER PAHLAVI TURNED SECTION MARK 𐮚
1037 0x10B9B, // PSALTER PAHLAVI FOUR DOTS WITH CROSS 𐮛
1038 0x10B9C, // PSALTER PAHLAVI FOUR DOTS WITH DOT 𐮜
1039 0x10F55, // SOGDIAN PUNCTUATION TWO VERTICAL BARS 𐽕
1040 0x10F56, // SOGDIAN PUNCTUATION TWO VERTICAL BARS WITH DOTS 𐽖
1041 0x10F57, // SOGDIAN PUNCTUATION CIRCLE WITH DOT 𐽗
1042 0x10F58, // SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS 𐽘
1043 0x10F59, // SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 𐽙
1044 0x11047, // BRAHMI DANDA 𑁇
1045 0x11048, // BRAHMI DOUBLE DANDA 𑁈
1046 0x11049, // BRAHMI PUNCTUATION DOT 𑁉
1047 0x1104A, // BRAHMI PUNCTUATION DOUBLE DOT 𑁊
1048 0x1104B, // BRAHMI PUNCTUATION LINE 𑁋
1049 0x1104C, // BRAHMI PUNCTUATION CRESCENT BAR 𑁌
1050 0x1104D, // BRAHMI PUNCTUATION LOTUS 𑁍
1051 0x110BB, // KAITHI ABBREVIATION SIGN 𑂻
1052 0x110BC, // KAITHI ENUMERATION SIGN 𑂼
1053 0x110BE, // KAITHI SECTION MARK 𑂾
1054 0x110BF, // KAITHI DOUBLE SECTION MARK 𑂿
1055 0x110C0, // KAITHI DANDA 𑃀
1056 0x110C1, // KAITHI DOUBLE DANDA 𑃁
1057 0x11140, // CHAKMA SECTION MARK 𑅀
1058 0x11141, // CHAKMA DANDA 𑅁
1059 0x11142, // CHAKMA DOUBLE DANDA 𑅂
1060 0x11143, // CHAKMA QUESTION MARK 𑅃
1061 0x11174, // MAHAJANI ABBREVIATION SIGN 𑅴
1062 0x11175, // MAHAJANI SECTION MARK 𑅵
1063 0x111C5, // SHARADA DANDA 𑇅
1064 0x111C6, // SHARADA DOUBLE DANDA 𑇆
1065 0x111C7, // SHARADA ABBREVIATION SIGN 𑇇
1066 0x111C8, // SHARADA SEPARATOR 𑇈
1067 0x111CD, // SHARADA SUTRA MARK 𑇍
1068 0x111DB, // SHARADA SIGN SIDDHAM 𑇛
1069 0x111DD, // SHARADA CONTINUATION SIGN 𑇝
1070 0x111DE, // SHARADA SECTION MARK-1 𑇞
1071 0x111DF, // SHARADA SECTION MARK-2 𑇟
1072 0x11238, // KHOJKI DANDA 𑈸
1073 0x11239, // KHOJKI DOUBLE DANDA 𑈹
1074 0x1123A, // KHOJKI WORD SEPARATOR 𑈺
1075 0x1123B, // KHOJKI SECTION MARK 𑈻
1076 0x1123C, // KHOJKI DOUBLE SECTION MARK 𑈼
1077 0x1123D, // KHOJKI ABBREVIATION SIGN 𑈽
1078 0x112A9, // MULTANI SECTION MARK 𑊩
1079 0x1144B, // NEWA DANDA 𑑋
1080 0x1144C, // NEWA DOUBLE DANDA 𑑌
1081 0x1144D, // NEWA COMMA 𑑍
1082 0x1144E, // NEWA GAP FILLER 𑑎
1083 0x1144F, // NEWA ABBREVIATION SIGN 𑑏
1084 0x1145B, // NEWA PLACEHOLDER MARK 𑑛
1085 0x1145D, // NEWA INSERTION SIGN 𑑝
1086 0x114C6, // TIRHUTA ABBREVIATION SIGN 𑓆
1087 0x115C1, // SIDDHAM SIGN SIDDHAM 𑗁
1088 0x115C2, // SIDDHAM DANDA 𑗂
1089 0x115C3, // SIDDHAM DOUBLE DANDA 𑗃
1090 0x115C4, // SIDDHAM SEPARATOR DOT 𑗄
1091 0x115C5, // SIDDHAM SEPARATOR BAR 𑗅
1092 0x115C6, // SIDDHAM REPETITION MARK-1 𑗆
1093 0x115C7, // SIDDHAM REPETITION MARK-2 𑗇
1094 0x115C8, // SIDDHAM REPETITION MARK-3 𑗈
1095 0x115C9, // SIDDHAM END OF TEXT MARK 𑗉
1096 0x115CA, // SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS 𑗊
1097 0x115CB, // SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS 𑗋
1098 0x115CC, // SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS 𑗌
1099 0x115CD, // SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS 𑗍
1100 0x115CE, // SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS 𑗎
1101 0x115CF, // SIDDHAM SECTION MARK DOUBLE RING 𑗏
1102 0x115D0, // SIDDHAM SECTION MARK DOUBLE RING WITH RAYS 𑗐
1103 0x115D1, // SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS 𑗑
1104 0x115D2, // SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS 𑗒
1105 0x115D3, // SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS 𑗓
1106 0x115D4, // SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS 𑗔
1107 0x115D5, // SIDDHAM SECTION MARK WITH CIRCLES AND RAYS 𑗕
1108 0x115D6, // SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES 𑗖
1109 0x115D7, // SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 𑗗
1110 0x11641, // MODI DANDA 𑙁
1111 0x11642, // MODI DOUBLE DANDA 𑙂
1112 0x11643, // MODI ABBREVIATION SIGN 𑙃
1113 0x11660, // MONGOLIAN BIRGA WITH ORNAMENT 𑙠
1114 0x11661, // MONGOLIAN ROTATED BIRGA 𑙡
1115 0x11662, // MONGOLIAN DOUBLE BIRGA WITH ORNAMENT 𑙢
1116 0x11663, // MONGOLIAN TRIPLE BIRGA WITH ORNAMENT 𑙣
1117 0x11664, // MONGOLIAN BIRGA WITH DOUBLE ORNAMENT 𑙤
1118 0x11665, // MONGOLIAN ROTATED BIRGA WITH ORNAMENT 𑙥
1119 0x11666, // MONGOLIAN ROTATED BIRGA WITH DOUBLE ORNAMENT 𑙦
1120 0x11667, // MONGOLIAN INVERTED BIRGA 𑙧
1121 0x11668, // MONGOLIAN INVERTED BIRGA WITH DOUBLE ORNAMENT 𑙨
1122 0x11669, // MONGOLIAN SWIRL BIRGA 𑙩
1123 0x1166A, // MONGOLIAN SWIRL BIRGA WITH ORNAMENT 𑙪
1124 0x1166B, // MONGOLIAN SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙫
1125 0x1166C, // MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙬
1126 0x1173C, // AHOM SIGN SMALL SECTION 𑜼
1127 0x1173D, // AHOM SIGN SECTION 𑜽
1128 0x1173E, // AHOM SIGN RULAI 𑜾
1129 0x1183B, // DOGRA ABBREVIATION SIGN 𑠻
1130 0x119E2, // NANDINAGARI SIGN SIDDHAM 𑧢
1131 0x11A3F, // ZANABAZAR SQUARE INITIAL HEAD MARK 𑨿
1132 0x11A40, // ZANABAZAR SQUARE CLOSING HEAD MARK 𑩀
1133 0x11A41, // ZANABAZAR SQUARE MARK TSHEG 𑩁
1134 0x11A42, // ZANABAZAR SQUARE MARK SHAD 𑩂
1135 0x11A43, // ZANABAZAR SQUARE MARK DOUBLE SHAD 𑩃
1136 0x11A44, // ZANABAZAR SQUARE MARK LONG TSHEG 𑩄
1137 0x11A45, // ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK 𑩅
1138 0x11A46, // ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK 𑩆
1139 0x11A9A, // SOYOMBO MARK TSHEG 𑪚
1140 0x11A9B, // SOYOMBO MARK SHAD 𑪛
1141 0x11A9C, // SOYOMBO MARK DOUBLE SHAD 𑪜
1142 0x11A9E, // SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME 𑪞
1143 0x11A9F, // SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME 𑪟
1144 0x11AA0, // SOYOMBO HEAD MARK WITH MOON AND SUN 𑪠
1145 0x11AA1, // SOYOMBO TERMINAL MARK-1 𑪡
1146 0x11AA2, // SOYOMBO TERMINAL MARK-2 𑪢
1147 0x11C41, // BHAIKSUKI DANDA 𑱁
1148 0x11C42, // BHAIKSUKI DOUBLE DANDA 𑱂
1149 0x11C43, // BHAIKSUKI WORD SEPARATOR 𑱃
1150 0x11C44, // BHAIKSUKI GAP FILLER-1 𑱄
1151 0x11C45, // BHAIKSUKI GAP FILLER-2 𑱅
1152 0x11C70, // MARCHEN HEAD MARK 𑱰
1153 0x11C71, // MARCHEN MARK SHAD 𑱱
1154 0x11EF7, // MAKASAR PASSIMBANG 𑻷
1155 0x11EF8, // MAKASAR END OF SECTION 𑻸
1156 0x11FFF, // TAMIL PUNCTUATION END OF TEXT 𑿿
1157 0x12470, // CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER 𒑰
1158 0x12471, // CUNEIFORM PUNCTUATION SIGN VERTICAL COLON 𒑱
1159 0x12472, // CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON 𒑲
1160 0x12473, // CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 𒑳
1161 0x12474, // CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 𒑴
1162 0x16A6E, // MRO DANDA 𖩮
1163 0x16A6F, // MRO DOUBLE DANDA 𖩯
1164 0x16AF5, // BASSA VAH FULL STOP 𖫵
1165 0x16B37, // PAHAWH HMONG SIGN VOS THOM 𖬷
1166 0x16B38, // PAHAWH HMONG SIGN VOS TSHAB CEEB 𖬸
1167 0x16B39, // PAHAWH HMONG SIGN CIM CHEEM 𖬹
1168 0x16B3A, // PAHAWH HMONG SIGN VOS THIAB 𖬺
1169 0x16B3B, // PAHAWH HMONG SIGN VOS FEEM 𖬻
1170 0x16B44, // PAHAWH HMONG SIGN XAUS 𖭄
1171 0x16E97, // MEDEFAIDRIN COMMA 𖺗
1172 0x16E98, // MEDEFAIDRIN FULL STOP 𖺘
1173 0x16E99, // MEDEFAIDRIN SYMBOL AIVA 𖺙
1174 0x16E9A, // MEDEFAIDRIN EXCLAMATION OH 𖺚
1175 0x16FE2, // OLD CHINESE HOOK MARK 𖿢
1176 0x1BC9F, // DUPLOYAN PUNCTUATION CHINOOK FULL STOP 𛲟
1177 0x1DA87, // SIGNWRITING COMMA 𝪇
1178 0x1DA88, // SIGNWRITING FULL STOP 𝪈
1179 0x1DA89, // SIGNWRITING SEMICOLON 𝪉
1180 0x1DA8A, // SIGNWRITING COLON 𝪊
1181 0x1DA8B, // SIGNWRITING PARENTHESIS 𝪋
1182 0x1E95E, // ADLAM INITIAL EXCLAMATION MARK 𞥞
1183 0x1E95F, // ADLAM INITIAL QUESTION MARK
1184]
1185