| 1 | // Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license that can be found in the LICENSE file. |
| 3 | module utf8 |
| 4 | |
| 5 | // for unicode type fast lookup |
| 6 | const p_c = 1 // a control character. |
| 7 | |
| 8 | const p_p = 2 // a punctuation character. |
| 9 | |
| 10 | const p_n = 4 // a numeral. |
| 11 | |
| 12 | const p_s = 8 // a symbolic character. |
| 13 | |
| 14 | const p_z = 16 // a spacing character. |
| 15 | |
| 16 | const p_lu = 32 // an up_prer-case letter. |
| 17 | |
| 18 | const p_ll = 64 // a lower-case letter. |
| 19 | |
| 20 | const p_pr = 128 // a printable character according to Go's definition. |
| 21 | |
| 22 | const p_g = p_pr | p_z // a graphical character according to the Unicode definition. |
| 23 | |
| 24 | const p_lo = p_lu | p_ll // a letter that is neither up_prer nor lower case. |
| 25 | |
| 26 | const p_l_mask = p_lo |
| 27 | |
| 28 | const props = [ |
| 29 | p_c |
| 30 | //'\x00' |
| 31 | p_c |
| 32 | //'\x01' |
| 33 | p_c |
| 34 | //'\x02' |
| 35 | p_c |
| 36 | //'\x03' |
| 37 | p_c |
| 38 | //'\x04' |
| 39 | p_c |
| 40 | //'\x05' |
| 41 | p_c |
| 42 | //'\x06' |
| 43 | p_c |
| 44 | //'\a' |
| 45 | p_c |
| 46 | //'\b' |
| 47 | p_c |
| 48 | //'\t' |
| 49 | p_c |
| 50 | //'\n' |
| 51 | p_c |
| 52 | //'\v' |
| 53 | p_c |
| 54 | //'\f' |
| 55 | p_c |
| 56 | //'\r' |
| 57 | p_c |
| 58 | //'\x0e' |
| 59 | p_c |
| 60 | //'\x0f' |
| 61 | p_c |
| 62 | //'\x10' |
| 63 | p_c |
| 64 | //'\x11' |
| 65 | p_c |
| 66 | //'\x12' |
| 67 | p_c |
| 68 | //'\x13' |
| 69 | p_c |
| 70 | //'\x14' |
| 71 | p_c |
| 72 | //'\x15' |
| 73 | p_c |
| 74 | //'\x16' |
| 75 | p_c |
| 76 | //'\x17' |
| 77 | p_c |
| 78 | //'\x18' |
| 79 | p_c |
| 80 | //'\x19' |
| 81 | p_c |
| 82 | //'\x1a' |
| 83 | p_c |
| 84 | //'\x1b' |
| 85 | p_c |
| 86 | //'\x1c' |
| 87 | p_c |
| 88 | //'\x1d' |
| 89 | p_c |
| 90 | //'\x1e' |
| 91 | p_c |
| 92 | //'\x1f' |
| 93 | p_z | p_pr |
| 94 | //' ' |
| 95 | p_p | p_pr |
| 96 | //'!' |
| 97 | p_p | p_pr |
| 98 | //'"' |
| 99 | p_p | p_pr |
| 100 | //'#' |
| 101 | p_s | p_pr |
| 102 | //'$' |
| 103 | p_p | p_pr |
| 104 | //'%' |
| 105 | p_p | p_pr |
| 106 | //'&' |
| 107 | p_p | p_pr |
| 108 | //'\'' |
| 109 | p_p | p_pr |
| 110 | //'(' |
| 111 | p_p | p_pr |
| 112 | //')' |
| 113 | p_p | p_pr |
| 114 | //'*' |
| 115 | p_s | p_pr |
| 116 | //'+' |
| 117 | p_p | p_pr |
| 118 | //',' |
| 119 | p_p | p_pr |
| 120 | //'-' |
| 121 | p_p | p_pr |
| 122 | //'.' |
| 123 | p_p | p_pr |
| 124 | //'/' |
| 125 | p_n | p_pr |
| 126 | //'0' |
| 127 | p_n | p_pr |
| 128 | //'1' |
| 129 | p_n | p_pr |
| 130 | //'2' |
| 131 | p_n | p_pr |
| 132 | //'3' |
| 133 | p_n | p_pr |
| 134 | //'4' |
| 135 | p_n | p_pr |
| 136 | //'5' |
| 137 | p_n | p_pr |
| 138 | //'6' |
| 139 | p_n | p_pr |
| 140 | //'7' |
| 141 | p_n | p_pr |
| 142 | //'8' |
| 143 | p_n | p_pr |
| 144 | //'9' |
| 145 | p_p | p_pr |
| 146 | //':' |
| 147 | p_p | p_pr |
| 148 | //';' |
| 149 | p_s | p_pr |
| 150 | //'<' |
| 151 | p_s | p_pr |
| 152 | //'=' |
| 153 | p_s | p_pr |
| 154 | //'>' |
| 155 | p_p | p_pr |
| 156 | //'?' |
| 157 | p_p | p_pr |
| 158 | //'@' |
| 159 | p_lu | p_pr |
| 160 | //'A' |
| 161 | p_lu | p_pr |
| 162 | //'B' |
| 163 | p_lu | p_pr |
| 164 | //'C' |
| 165 | p_lu | p_pr |
| 166 | //'D' |
| 167 | p_lu | p_pr |
| 168 | //'E' |
| 169 | p_lu | p_pr |
| 170 | //'F' |
| 171 | p_lu | p_pr |
| 172 | //'G' |
| 173 | p_lu | p_pr |
| 174 | //'H' |
| 175 | p_lu | p_pr |
| 176 | //'I' |
| 177 | p_lu | p_pr |
| 178 | //'J' |
| 179 | p_lu | p_pr |
| 180 | //'K' |
| 181 | p_lu | p_pr |
| 182 | //'L' |
| 183 | p_lu | p_pr |
| 184 | //'M' |
| 185 | p_lu | p_pr |
| 186 | //'N' |
| 187 | p_lu | p_pr |
| 188 | //'O' |
| 189 | p_lu | p_pr |
| 190 | //'P' |
| 191 | p_lu | p_pr |
| 192 | //'Q' |
| 193 | p_lu | p_pr |
| 194 | //'R' |
| 195 | p_lu | p_pr |
| 196 | //'S' |
| 197 | p_lu | p_pr |
| 198 | //'T' |
| 199 | p_lu | p_pr |
| 200 | //'U' |
| 201 | p_lu | p_pr |
| 202 | //'V' |
| 203 | p_lu | p_pr |
| 204 | //'W' |
| 205 | p_lu | p_pr |
| 206 | //'X' |
| 207 | p_lu | p_pr |
| 208 | //'Y' |
| 209 | p_lu | p_pr |
| 210 | //'Z' |
| 211 | p_p | p_pr |
| 212 | //'[' |
| 213 | p_p | p_pr |
| 214 | //'\\' |
| 215 | p_p | p_pr |
| 216 | //']' |
| 217 | p_s | p_pr |
| 218 | //'^' |
| 219 | p_p | p_pr |
| 220 | //'_' |
| 221 | p_s | p_pr |
| 222 | //'`' |
| 223 | p_ll | p_pr |
| 224 | //'a' |
| 225 | p_ll | p_pr |
| 226 | //'b' |
| 227 | p_ll | p_pr |
| 228 | //'c' |
| 229 | p_ll | p_pr |
| 230 | //'d' |
| 231 | p_ll | p_pr |
| 232 | //'e' |
| 233 | p_ll | p_pr |
| 234 | //'f' |
| 235 | p_ll | p_pr |
| 236 | //'g' |
| 237 | p_ll | p_pr |
| 238 | //'h' |
| 239 | p_ll | p_pr |
| 240 | //'i' |
| 241 | p_ll | p_pr |
| 242 | //'j' |
| 243 | p_ll | p_pr |
| 244 | //'k' |
| 245 | p_ll | p_pr |
| 246 | //'l' |
| 247 | p_ll | p_pr |
| 248 | //'m' |
| 249 | p_ll | p_pr |
| 250 | //'n' |
| 251 | p_ll | p_pr |
| 252 | //'o' |
| 253 | p_ll | p_pr |
| 254 | //'p' |
| 255 | p_ll | p_pr |
| 256 | //'q' |
| 257 | p_ll | p_pr |
| 258 | //'r' |
| 259 | p_ll | p_pr |
| 260 | //'s' |
| 261 | p_ll | p_pr |
| 262 | //'t' |
| 263 | p_ll | p_pr |
| 264 | //'u' |
| 265 | p_ll | p_pr |
| 266 | //'v' |
| 267 | p_ll | p_pr |
| 268 | //'w' |
| 269 | p_ll | p_pr |
| 270 | //'x' |
| 271 | p_ll | p_pr |
| 272 | //'y' |
| 273 | p_ll | p_pr |
| 274 | //'z' |
| 275 | p_p | p_pr |
| 276 | //'{' |
| 277 | p_s | p_pr |
| 278 | //'|' |
| 279 | p_p | p_pr |
| 280 | //'}' |
| 281 | p_s | p_pr |
| 282 | //'~' |
| 283 | p_c |
| 284 | //'\u007f' |
| 285 | p_c |
| 286 | //'\u0080' |
| 287 | p_c |
| 288 | //'\u0081' |
| 289 | p_c |
| 290 | //'\u0082' |
| 291 | p_c |
| 292 | //'\u0083' |
| 293 | p_c |
| 294 | //'\u0084' |
| 295 | p_c |
| 296 | //'\u0085' |
| 297 | p_c |
| 298 | //'\u0086' |
| 299 | p_c |
| 300 | //'\u0087' |
| 301 | p_c |
| 302 | //'\u0088' |
| 303 | p_c |
| 304 | //'\u0089' |
| 305 | p_c |
| 306 | //'\u008a' |
| 307 | p_c |
| 308 | //'\u008b' |
| 309 | p_c |
| 310 | //'\u008c' |
| 311 | p_c |
| 312 | //'\u008d' |
| 313 | p_c |
| 314 | //'\u008e' |
| 315 | p_c |
| 316 | //'\u008f' |
| 317 | p_c |
| 318 | //'\u0090' |
| 319 | p_c |
| 320 | //'\u0091' |
| 321 | p_c |
| 322 | //'\u0092' |
| 323 | p_c |
| 324 | //'\u0093' |
| 325 | p_c |
| 326 | //'\u0094' |
| 327 | p_c |
| 328 | //'\u0095' |
| 329 | p_c |
| 330 | //'\u0096' |
| 331 | p_c |
| 332 | //'\u0097' |
| 333 | p_c |
| 334 | //'\u0098' |
| 335 | p_c |
| 336 | //'\u0099' |
| 337 | p_c |
| 338 | //'\u009a' |
| 339 | p_c |
| 340 | //'\u009b' |
| 341 | p_c |
| 342 | //'\u009c' |
| 343 | p_c |
| 344 | //'\u009d' |
| 345 | p_c |
| 346 | //'\u009e' |
| 347 | p_c |
| 348 | //'\u009f' |
| 349 | p_z |
| 350 | //'\u00a0' |
| 351 | p_p | p_pr |
| 352 | //'¡' |
| 353 | p_s | p_pr |
| 354 | //'¢' |
| 355 | p_s | p_pr |
| 356 | //'£' |
| 357 | p_s | p_pr |
| 358 | //'¤' |
| 359 | p_s | p_pr |
| 360 | //'¥' |
| 361 | p_s | p_pr |
| 362 | //'¦' |
| 363 | p_p | p_pr |
| 364 | //'§' |
| 365 | p_s | p_pr |
| 366 | //'¨' |
| 367 | p_s | p_pr |
| 368 | //'©' |
| 369 | p_lo | p_pr |
| 370 | //'ª' |
| 371 | p_p | p_pr |
| 372 | //'«' |
| 373 | p_s | p_pr |
| 374 | //'¬' |
| 375 | 0 |
| 376 | //'\u00ad' |
| 377 | p_s | p_pr |
| 378 | //'®' |
| 379 | p_s | p_pr |
| 380 | //'¯' |
| 381 | p_s | p_pr |
| 382 | //'°' |
| 383 | p_s | p_pr |
| 384 | //'±' |
| 385 | p_n | p_pr |
| 386 | //'²' |
| 387 | p_n | p_pr |
| 388 | //'³' |
| 389 | p_s | p_pr |
| 390 | //'´' |
| 391 | p_ll | p_pr |
| 392 | //'µ' |
| 393 | p_p | p_pr |
| 394 | //'¶' |
| 395 | p_p | p_pr |
| 396 | //'·' |
| 397 | p_s | p_pr |
| 398 | //'¸' |
| 399 | p_n | p_pr |
| 400 | //'¹' |
| 401 | p_lo | p_pr |
| 402 | //'º' |
| 403 | p_p | p_pr |
| 404 | //'»' |
| 405 | p_n | p_pr |
| 406 | //'¼' |
| 407 | p_n | p_pr |
| 408 | //'½' |
| 409 | p_n | p_pr |
| 410 | //'¾' |
| 411 | p_p | p_pr |
| 412 | //'¿' |
| 413 | p_lu | p_pr |
| 414 | //'À' |
| 415 | p_lu | p_pr |
| 416 | //'Á' |
| 417 | p_lu | p_pr |
| 418 | //'Â' |
| 419 | p_lu | p_pr |
| 420 | //'Ã' |
| 421 | p_lu | p_pr |
| 422 | //'Ä' |
| 423 | p_lu | p_pr |
| 424 | //'Å' |
| 425 | p_lu | p_pr |
| 426 | //'Æ' |
| 427 | p_lu | p_pr |
| 428 | //'Ç' |
| 429 | p_lu | p_pr |
| 430 | //'È' |
| 431 | p_lu | p_pr |
| 432 | //'É' |
| 433 | p_lu | p_pr |
| 434 | //'Ê' |
| 435 | p_lu | p_pr |
| 436 | //'Ë' |
| 437 | p_lu | p_pr |
| 438 | //'Ì' |
| 439 | p_lu | p_pr |
| 440 | //'Í' |
| 441 | p_lu | p_pr |
| 442 | //'Î' |
| 443 | p_lu | p_pr |
| 444 | //'Ï' |
| 445 | p_lu | p_pr |
| 446 | //'Ð' |
| 447 | p_lu | p_pr |
| 448 | //'Ñ' |
| 449 | p_lu | p_pr |
| 450 | //'Ò' |
| 451 | p_lu | p_pr |
| 452 | //'Ó' |
| 453 | p_lu | p_pr |
| 454 | //'Ô' |
| 455 | p_lu | p_pr |
| 456 | //'Õ' |
| 457 | p_lu | p_pr |
| 458 | //'Ö' |
| 459 | p_s | p_pr |
| 460 | //'×' |
| 461 | p_lu | p_pr |
| 462 | //'Ø' |
| 463 | p_lu | p_pr |
| 464 | //'Ù' |
| 465 | p_lu | p_pr |
| 466 | //'Ú' |
| 467 | p_lu | p_pr |
| 468 | //'Û' |
| 469 | p_lu | p_pr |
| 470 | //'Ü' |
| 471 | p_lu | p_pr |
| 472 | //'Ý' |
| 473 | p_lu | p_pr |
| 474 | //'Þ' |
| 475 | p_ll | p_pr |
| 476 | //'ß' |
| 477 | p_ll | p_pr |
| 478 | //'à' |
| 479 | p_ll | p_pr |
| 480 | //'á' |
| 481 | p_ll | p_pr |
| 482 | //'â' |
| 483 | p_ll | p_pr |
| 484 | //'ã' |
| 485 | p_ll | p_pr |
| 486 | //'ä' |
| 487 | p_ll | p_pr |
| 488 | //'å' |
| 489 | p_ll | p_pr |
| 490 | //'æ' |
| 491 | p_ll | p_pr |
| 492 | //'ç' |
| 493 | p_ll | p_pr |
| 494 | //'è' |
| 495 | p_ll | p_pr |
| 496 | //'é' |
| 497 | p_ll | p_pr |
| 498 | //'ê' |
| 499 | p_ll | p_pr |
| 500 | //'ë' |
| 501 | p_ll | p_pr |
| 502 | //'ì' |
| 503 | p_ll | p_pr |
| 504 | //'í' |
| 505 | p_ll | p_pr |
| 506 | //'î' |
| 507 | p_ll | p_pr |
| 508 | //'ï' |
| 509 | p_ll | p_pr |
| 510 | //'ð' |
| 511 | p_ll | p_pr |
| 512 | //'ñ' |
| 513 | p_ll | p_pr |
| 514 | //'ò' |
| 515 | p_ll | p_pr |
| 516 | //'ó' |
| 517 | p_ll | p_pr |
| 518 | //'ô' |
| 519 | p_ll | p_pr |
| 520 | //'õ' |
| 521 | p_ll | p_pr |
| 522 | //'ö' |
| 523 | p_s | p_pr |
| 524 | //'÷' |
| 525 | p_ll | p_pr |
| 526 | //'ø' |
| 527 | p_ll | p_pr |
| 528 | //'ù' |
| 529 | p_ll | p_pr |
| 530 | //'ú' |
| 531 | p_ll | p_pr |
| 532 | //'û' |
| 533 | p_ll | p_pr |
| 534 | //'ü' |
| 535 | p_ll | p_pr |
| 536 | //'ý' |
| 537 | p_ll | p_pr |
| 538 | //'þ' |
| 539 | p_ll | p_pr |
| 540 | // 'ÿ' |
| 541 | ]! |
| 542 | |
| 543 | // These tables are based on Go lang's tables: https://cs.opensource.google/go/go/+/refs/tags/go1.17.1:src/unicode/tables.go. |
| 544 | // There is no need to investigate unicodes' type like letter yourself. |
| 545 | const max_latin_1 = rune(0x00ff) |
| 546 | |
| 547 | // Represents all unicode in unicode category L. |
| 548 | const letter_table = RangeTable{ |
| 549 | r16: [ |
| 550 | Range16{0x0041, 0x005a, 1}, |
| 551 | Range16{0x0061, 0x007a, 1}, |
| 552 | Range16{0x00aa, 0x00b5, 11}, |
| 553 | Range16{0x00ba, 0x00c0, 6}, |
| 554 | Range16{0x00c1, 0x00d6, 1}, |
| 555 | Range16{0x00d8, 0x00f6, 1}, |
| 556 | Range16{0x00f8, 0x02c1, 1}, |
| 557 | Range16{0x02c6, 0x02d1, 1}, |
| 558 | Range16{0x02e0, 0x02e4, 1}, |
| 559 | Range16{0x02ec, 0x02ee, 2}, |
| 560 | Range16{0x0370, 0x0374, 1}, |
| 561 | Range16{0x0376, 0x0377, 1}, |
| 562 | Range16{0x037a, 0x037d, 1}, |
| 563 | Range16{0x037f, 0x0386, 7}, |
| 564 | Range16{0x0388, 0x038a, 1}, |
| 565 | Range16{0x038c, 0x038e, 2}, |
| 566 | Range16{0x038f, 0x03a1, 1}, |
| 567 | Range16{0x03a3, 0x03f5, 1}, |
| 568 | Range16{0x03f7, 0x0481, 1}, |
| 569 | Range16{0x048a, 0x052f, 1}, |
| 570 | Range16{0x0531, 0x0556, 1}, |
| 571 | Range16{0x0559, 0x0560, 7}, |
| 572 | Range16{0x0561, 0x0588, 1}, |
| 573 | Range16{0x05d0, 0x05ea, 1}, |
| 574 | Range16{0x05ef, 0x05f2, 1}, |
| 575 | Range16{0x0620, 0x064a, 1}, |
| 576 | Range16{0x066e, 0x066f, 1}, |
| 577 | Range16{0x0671, 0x06d3, 1}, |
| 578 | Range16{0x06d5, 0x06e5, 16}, |
| 579 | Range16{0x06e6, 0x06ee, 8}, |
| 580 | Range16{0x06ef, 0x06fa, 11}, |
| 581 | Range16{0x06fb, 0x06fc, 1}, |
| 582 | Range16{0x06ff, 0x0710, 17}, |
| 583 | Range16{0x0712, 0x072f, 1}, |
| 584 | Range16{0x074d, 0x07a5, 1}, |
| 585 | Range16{0x07b1, 0x07ca, 25}, |
| 586 | Range16{0x07cb, 0x07ea, 1}, |
| 587 | Range16{0x07f4, 0x07f5, 1}, |
| 588 | Range16{0x07fa, 0x0800, 6}, |
| 589 | Range16{0x0801, 0x0815, 1}, |
| 590 | Range16{0x081a, 0x0824, 10}, |
| 591 | Range16{0x0828, 0x0840, 24}, |
| 592 | Range16{0x0841, 0x0858, 1}, |
| 593 | Range16{0x0860, 0x086a, 1}, |
| 594 | Range16{0x08a0, 0x08b4, 1}, |
| 595 | Range16{0x08b6, 0x08c7, 1}, |
| 596 | Range16{0x0904, 0x0939, 1}, |
| 597 | Range16{0x093d, 0x0950, 19}, |
| 598 | Range16{0x0958, 0x0961, 1}, |
| 599 | Range16{0x0971, 0x0980, 1}, |
| 600 | Range16{0x0985, 0x098c, 1}, |
| 601 | Range16{0x098f, 0x0990, 1}, |
| 602 | Range16{0x0993, 0x09a8, 1}, |
| 603 | Range16{0x09aa, 0x09b0, 1}, |
| 604 | Range16{0x09b2, 0x09b6, 4}, |
| 605 | Range16{0x09b7, 0x09b9, 1}, |
| 606 | Range16{0x09bd, 0x09ce, 17}, |
| 607 | Range16{0x09dc, 0x09dd, 1}, |
| 608 | Range16{0x09df, 0x09e1, 1}, |
| 609 | Range16{0x09f0, 0x09f1, 1}, |
| 610 | Range16{0x09fc, 0x0a05, 9}, |
| 611 | Range16{0x0a06, 0x0a0a, 1}, |
| 612 | Range16{0x0a0f, 0x0a10, 1}, |
| 613 | Range16{0x0a13, 0x0a28, 1}, |
| 614 | Range16{0x0a2a, 0x0a30, 1}, |
| 615 | Range16{0x0a32, 0x0a33, 1}, |
| 616 | Range16{0x0a35, 0x0a36, 1}, |
| 617 | Range16{0x0a38, 0x0a39, 1}, |
| 618 | Range16{0x0a59, 0x0a5c, 1}, |
| 619 | Range16{0x0a5e, 0x0a72, 20}, |
| 620 | Range16{0x0a73, 0x0a74, 1}, |
| 621 | Range16{0x0a85, 0x0a8d, 1}, |
| 622 | Range16{0x0a8f, 0x0a91, 1}, |
| 623 | Range16{0x0a93, 0x0aa8, 1}, |
| 624 | Range16{0x0aaa, 0x0ab0, 1}, |
| 625 | Range16{0x0ab2, 0x0ab3, 1}, |
| 626 | Range16{0x0ab5, 0x0ab9, 1}, |
| 627 | Range16{0x0abd, 0x0ad0, 19}, |
| 628 | Range16{0x0ae0, 0x0ae1, 1}, |
| 629 | Range16{0x0af9, 0x0b05, 12}, |
| 630 | Range16{0x0b06, 0x0b0c, 1}, |
| 631 | Range16{0x0b0f, 0x0b10, 1}, |
| 632 | Range16{0x0b13, 0x0b28, 1}, |
| 633 | Range16{0x0b2a, 0x0b30, 1}, |
| 634 | Range16{0x0b32, 0x0b33, 1}, |
| 635 | Range16{0x0b35, 0x0b39, 1}, |
| 636 | Range16{0x0b3d, 0x0b5c, 31}, |
| 637 | Range16{0x0b5d, 0x0b5f, 2}, |
| 638 | Range16{0x0b60, 0x0b61, 1}, |
| 639 | Range16{0x0b71, 0x0b83, 18}, |
| 640 | Range16{0x0b85, 0x0b8a, 1}, |
| 641 | Range16{0x0b8e, 0x0b90, 1}, |
| 642 | Range16{0x0b92, 0x0b95, 1}, |
| 643 | Range16{0x0b99, 0x0b9a, 1}, |
| 644 | Range16{0x0b9c, 0x0b9e, 2}, |
| 645 | Range16{0x0b9f, 0x0ba3, 4}, |
| 646 | Range16{0x0ba4, 0x0ba8, 4}, |
| 647 | Range16{0x0ba9, 0x0baa, 1}, |
| 648 | Range16{0x0bae, 0x0bb9, 1}, |
| 649 | Range16{0x0bd0, 0x0c05, 53}, |
| 650 | Range16{0x0c06, 0x0c0c, 1}, |
| 651 | Range16{0x0c0e, 0x0c10, 1}, |
| 652 | Range16{0x0c12, 0x0c28, 1}, |
| 653 | Range16{0x0c2a, 0x0c39, 1}, |
| 654 | Range16{0x0c3d, 0x0c58, 27}, |
| 655 | Range16{0x0c59, 0x0c5a, 1}, |
| 656 | Range16{0x0c60, 0x0c61, 1}, |
| 657 | Range16{0x0c80, 0x0c85, 5}, |
| 658 | Range16{0x0c86, 0x0c8c, 1}, |
| 659 | Range16{0x0c8e, 0x0c90, 1}, |
| 660 | Range16{0x0c92, 0x0ca8, 1}, |
| 661 | Range16{0x0caa, 0x0cb3, 1}, |
| 662 | Range16{0x0cb5, 0x0cb9, 1}, |
| 663 | Range16{0x0cbd, 0x0cde, 33}, |
| 664 | Range16{0x0ce0, 0x0ce1, 1}, |
| 665 | Range16{0x0cf1, 0x0cf2, 1}, |
| 666 | Range16{0x0d04, 0x0d0c, 1}, |
| 667 | Range16{0x0d0e, 0x0d10, 1}, |
| 668 | Range16{0x0d12, 0x0d3a, 1}, |
| 669 | Range16{0x0d3d, 0x0d4e, 17}, |
| 670 | Range16{0x0d54, 0x0d56, 1}, |
| 671 | Range16{0x0d5f, 0x0d61, 1}, |
| 672 | Range16{0x0d7a, 0x0d7f, 1}, |
| 673 | Range16{0x0d85, 0x0d96, 1}, |
| 674 | Range16{0x0d9a, 0x0db1, 1}, |
| 675 | Range16{0x0db3, 0x0dbb, 1}, |
| 676 | Range16{0x0dbd, 0x0dc0, 3}, |
| 677 | Range16{0x0dc1, 0x0dc6, 1}, |
| 678 | Range16{0x0e01, 0x0e30, 1}, |
| 679 | Range16{0x0e32, 0x0e33, 1}, |
| 680 | Range16{0x0e40, 0x0e46, 1}, |
| 681 | Range16{0x0e81, 0x0e82, 1}, |
| 682 | Range16{0x0e84, 0x0e86, 2}, |
| 683 | Range16{0x0e87, 0x0e8a, 1}, |
| 684 | Range16{0x0e8c, 0x0ea3, 1}, |
| 685 | Range16{0x0ea5, 0x0ea7, 2}, |
| 686 | Range16{0x0ea8, 0x0eb0, 1}, |
| 687 | Range16{0x0eb2, 0x0eb3, 1}, |
| 688 | Range16{0x0ebd, 0x0ec0, 3}, |
| 689 | Range16{0x0ec1, 0x0ec4, 1}, |
| 690 | Range16{0x0ec6, 0x0edc, 22}, |
| 691 | Range16{0x0edd, 0x0edf, 1}, |
| 692 | Range16{0x0f00, 0x0f40, 64}, |
| 693 | Range16{0x0f41, 0x0f47, 1}, |
| 694 | Range16{0x0f49, 0x0f6c, 1}, |
| 695 | Range16{0x0f88, 0x0f8c, 1}, |
| 696 | Range16{0x1000, 0x102a, 1}, |
| 697 | Range16{0x103f, 0x1050, 17}, |
| 698 | Range16{0x1051, 0x1055, 1}, |
| 699 | Range16{0x105a, 0x105d, 1}, |
| 700 | Range16{0x1061, 0x1065, 4}, |
| 701 | Range16{0x1066, 0x106e, 8}, |
| 702 | Range16{0x106f, 0x1070, 1}, |
| 703 | Range16{0x1075, 0x1081, 1}, |
| 704 | Range16{0x108e, 0x10a0, 18}, |
| 705 | Range16{0x10a1, 0x10c5, 1}, |
| 706 | Range16{0x10c7, 0x10cd, 6}, |
| 707 | Range16{0x10d0, 0x10fa, 1}, |
| 708 | Range16{0x10fc, 0x1248, 1}, |
| 709 | Range16{0x124a, 0x124d, 1}, |
| 710 | Range16{0x1250, 0x1256, 1}, |
| 711 | Range16{0x1258, 0x125a, 2}, |
| 712 | Range16{0x125b, 0x125d, 1}, |
| 713 | Range16{0x1260, 0x1288, 1}, |
| 714 | Range16{0x128a, 0x128d, 1}, |
| 715 | Range16{0x1290, 0x12b0, 1}, |
| 716 | Range16{0x12b2, 0x12b5, 1}, |
| 717 | Range16{0x12b8, 0x12be, 1}, |
| 718 | Range16{0x12c0, 0x12c2, 2}, |
| 719 | Range16{0x12c3, 0x12c5, 1}, |
| 720 | Range16{0x12c8, 0x12d6, 1}, |
| 721 | Range16{0x12d8, 0x1310, 1}, |
| 722 | Range16{0x1312, 0x1315, 1}, |
| 723 | Range16{0x1318, 0x135a, 1}, |
| 724 | Range16{0x1380, 0x138f, 1}, |
| 725 | Range16{0x13a0, 0x13f5, 1}, |
| 726 | Range16{0x13f8, 0x13fd, 1}, |
| 727 | Range16{0x1401, 0x166c, 1}, |
| 728 | Range16{0x166f, 0x167f, 1}, |
| 729 | Range16{0x1681, 0x169a, 1}, |
| 730 | Range16{0x16a0, 0x16ea, 1}, |
| 731 | Range16{0x16f1, 0x16f8, 1}, |
| 732 | Range16{0x1700, 0x170c, 1}, |
| 733 | Range16{0x170e, 0x1711, 1}, |
| 734 | Range16{0x1720, 0x1731, 1}, |
| 735 | Range16{0x1740, 0x1751, 1}, |
| 736 | Range16{0x1760, 0x176c, 1}, |
| 737 | Range16{0x176e, 0x1770, 1}, |
| 738 | Range16{0x1780, 0x17b3, 1}, |
| 739 | Range16{0x17d7, 0x17dc, 5}, |
| 740 | Range16{0x1820, 0x1878, 1}, |
| 741 | Range16{0x1880, 0x1884, 1}, |
| 742 | Range16{0x1887, 0x18a8, 1}, |
| 743 | Range16{0x18aa, 0x18b0, 6}, |
| 744 | Range16{0x18b1, 0x18f5, 1}, |
| 745 | Range16{0x1900, 0x191e, 1}, |
| 746 | Range16{0x1950, 0x196d, 1}, |
| 747 | Range16{0x1970, 0x1974, 1}, |
| 748 | Range16{0x1980, 0x19ab, 1}, |
| 749 | Range16{0x19b0, 0x19c9, 1}, |
| 750 | Range16{0x1a00, 0x1a16, 1}, |
| 751 | Range16{0x1a20, 0x1a54, 1}, |
| 752 | Range16{0x1aa7, 0x1b05, 94}, |
| 753 | Range16{0x1b06, 0x1b33, 1}, |
| 754 | Range16{0x1b45, 0x1b4b, 1}, |
| 755 | Range16{0x1b83, 0x1ba0, 1}, |
| 756 | Range16{0x1bae, 0x1baf, 1}, |
| 757 | Range16{0x1bba, 0x1be5, 1}, |
| 758 | Range16{0x1c00, 0x1c23, 1}, |
| 759 | Range16{0x1c4d, 0x1c4f, 1}, |
| 760 | Range16{0x1c5a, 0x1c7d, 1}, |
| 761 | Range16{0x1c80, 0x1c88, 1}, |
| 762 | Range16{0x1c90, 0x1cba, 1}, |
| 763 | Range16{0x1cbd, 0x1cbf, 1}, |
| 764 | Range16{0x1ce9, 0x1cec, 1}, |
| 765 | Range16{0x1cee, 0x1cf3, 1}, |
| 766 | Range16{0x1cf5, 0x1cf6, 1}, |
| 767 | Range16{0x1cfa, 0x1d00, 6}, |
| 768 | Range16{0x1d01, 0x1dbf, 1}, |
| 769 | Range16{0x1e00, 0x1f15, 1}, |
| 770 | Range16{0x1f18, 0x1f1d, 1}, |
| 771 | Range16{0x1f20, 0x1f45, 1}, |
| 772 | Range16{0x1f48, 0x1f4d, 1}, |
| 773 | Range16{0x1f50, 0x1f57, 1}, |
| 774 | Range16{0x1f59, 0x1f5f, 2}, |
| 775 | Range16{0x1f60, 0x1f7d, 1}, |
| 776 | Range16{0x1f80, 0x1fb4, 1}, |
| 777 | Range16{0x1fb6, 0x1fbc, 1}, |
| 778 | Range16{0x1fbe, 0x1fc2, 4}, |
| 779 | Range16{0x1fc3, 0x1fc4, 1}, |
| 780 | Range16{0x1fc6, 0x1fcc, 1}, |
| 781 | Range16{0x1fd0, 0x1fd3, 1}, |
| 782 | Range16{0x1fd6, 0x1fdb, 1}, |
| 783 | Range16{0x1fe0, 0x1fec, 1}, |
| 784 | Range16{0x1ff2, 0x1ff4, 1}, |
| 785 | Range16{0x1ff6, 0x1ffc, 1}, |
| 786 | Range16{0x2071, 0x207f, 14}, |
| 787 | Range16{0x2090, 0x209c, 1}, |
| 788 | Range16{0x2102, 0x2107, 5}, |
| 789 | Range16{0x210a, 0x2113, 1}, |
| 790 | Range16{0x2115, 0x2119, 4}, |
| 791 | Range16{0x211a, 0x211d, 1}, |
| 792 | Range16{0x2124, 0x212a, 2}, |
| 793 | Range16{0x212b, 0x212d, 1}, |
| 794 | Range16{0x212f, 0x2139, 1}, |
| 795 | Range16{0x213c, 0x213f, 1}, |
| 796 | Range16{0x2145, 0x2149, 1}, |
| 797 | Range16{0x214e, 0x2183, 53}, |
| 798 | Range16{0x2184, 0x2c00, 2684}, |
| 799 | Range16{0x2c01, 0x2c2e, 1}, |
| 800 | Range16{0x2c30, 0x2c5e, 1}, |
| 801 | Range16{0x2c60, 0x2ce4, 1}, |
| 802 | Range16{0x2ceb, 0x2cee, 1}, |
| 803 | Range16{0x2cf2, 0x2cf3, 1}, |
| 804 | Range16{0x2d00, 0x2d25, 1}, |
| 805 | Range16{0x2d27, 0x2d2d, 6}, |
| 806 | Range16{0x2d30, 0x2d67, 1}, |
| 807 | Range16{0x2d6f, 0x2d80, 17}, |
| 808 | Range16{0x2d81, 0x2d96, 1}, |
| 809 | Range16{0x2da0, 0x2da6, 1}, |
| 810 | Range16{0x2da8, 0x2dae, 1}, |
| 811 | Range16{0x2db0, 0x2db6, 1}, |
| 812 | Range16{0x2db8, 0x2dbe, 1}, |
| 813 | Range16{0x2dc0, 0x2dc6, 1}, |
| 814 | Range16{0x2dc8, 0x2dce, 1}, |
| 815 | Range16{0x2dd0, 0x2dd6, 1}, |
| 816 | Range16{0x2dd8, 0x2dde, 1}, |
| 817 | Range16{0x2e2f, 0x3005, 470}, |
| 818 | Range16{0x3006, 0x3031, 43}, |
| 819 | Range16{0x3032, 0x3035, 1}, |
| 820 | Range16{0x303b, 0x303c, 1}, |
| 821 | Range16{0x3041, 0x3096, 1}, |
| 822 | Range16{0x309d, 0x309f, 1}, |
| 823 | Range16{0x30a1, 0x30fa, 1}, |
| 824 | Range16{0x30fc, 0x30ff, 1}, |
| 825 | Range16{0x3105, 0x312f, 1}, |
| 826 | Range16{0x3131, 0x318e, 1}, |
| 827 | Range16{0x31a0, 0x31bf, 1}, |
| 828 | Range16{0x31f0, 0x31ff, 1}, |
| 829 | Range16{0x3400, 0x4dbf, 1}, |
| 830 | Range16{0x4e00, 0x9ffc, 1}, |
| 831 | Range16{0xa000, 0xa48c, 1}, |
| 832 | Range16{0xa4d0, 0xa4fd, 1}, |
| 833 | Range16{0xa500, 0xa60c, 1}, |
| 834 | Range16{0xa610, 0xa61f, 1}, |
| 835 | Range16{0xa62a, 0xa62b, 1}, |
| 836 | Range16{0xa640, 0xa66e, 1}, |
| 837 | Range16{0xa67f, 0xa69d, 1}, |
| 838 | Range16{0xa6a0, 0xa6e5, 1}, |
| 839 | Range16{0xa717, 0xa71f, 1}, |
| 840 | Range16{0xa722, 0xa788, 1}, |
| 841 | Range16{0xa78b, 0xa7bf, 1}, |
| 842 | Range16{0xa7c2, 0xa7ca, 1}, |
| 843 | Range16{0xa7f5, 0xa801, 1}, |
| 844 | Range16{0xa803, 0xa805, 1}, |
| 845 | Range16{0xa807, 0xa80a, 1}, |
| 846 | Range16{0xa80c, 0xa822, 1}, |
| 847 | Range16{0xa840, 0xa873, 1}, |
| 848 | Range16{0xa882, 0xa8b3, 1}, |
| 849 | Range16{0xa8f2, 0xa8f7, 1}, |
| 850 | Range16{0xa8fb, 0xa8fd, 2}, |
| 851 | Range16{0xa8fe, 0xa90a, 12}, |
| 852 | Range16{0xa90b, 0xa925, 1}, |
| 853 | Range16{0xa930, 0xa946, 1}, |
| 854 | Range16{0xa960, 0xa97c, 1}, |
| 855 | Range16{0xa984, 0xa9b2, 1}, |
| 856 | Range16{0xa9cf, 0xa9e0, 17}, |
| 857 | Range16{0xa9e1, 0xa9e4, 1}, |
| 858 | Range16{0xa9e6, 0xa9ef, 1}, |
| 859 | Range16{0xa9fa, 0xa9fe, 1}, |
| 860 | Range16{0xaa00, 0xaa28, 1}, |
| 861 | Range16{0xaa40, 0xaa42, 1}, |
| 862 | Range16{0xaa44, 0xaa4b, 1}, |
| 863 | Range16{0xaa60, 0xaa76, 1}, |
| 864 | Range16{0xaa7a, 0xaa7e, 4}, |
| 865 | Range16{0xaa7f, 0xaaaf, 1}, |
| 866 | Range16{0xaab1, 0xaab5, 4}, |
| 867 | Range16{0xaab6, 0xaab9, 3}, |
| 868 | Range16{0xaaba, 0xaabd, 1}, |
| 869 | Range16{0xaac0, 0xaac2, 2}, |
| 870 | Range16{0xaadb, 0xaadd, 1}, |
| 871 | Range16{0xaae0, 0xaaea, 1}, |
| 872 | Range16{0xaaf2, 0xaaf4, 1}, |
| 873 | Range16{0xab01, 0xab06, 1}, |
| 874 | Range16{0xab09, 0xab0e, 1}, |
| 875 | Range16{0xab11, 0xab16, 1}, |
| 876 | Range16{0xab20, 0xab26, 1}, |
| 877 | Range16{0xab28, 0xab2e, 1}, |
| 878 | Range16{0xab30, 0xab5a, 1}, |
| 879 | Range16{0xab5c, 0xab69, 1}, |
| 880 | Range16{0xab70, 0xabe2, 1}, |
| 881 | Range16{0xac00, 0xd7a3, 1}, |
| 882 | Range16{0xd7b0, 0xd7c6, 1}, |
| 883 | Range16{0xd7cb, 0xd7fb, 1}, |
| 884 | Range16{0xf900, 0xfa6d, 1}, |
| 885 | Range16{0xfa70, 0xfad9, 1}, |
| 886 | Range16{0xfb00, 0xfb06, 1}, |
| 887 | Range16{0xfb13, 0xfb17, 1}, |
| 888 | Range16{0xfb1d, 0xfb1f, 2}, |
| 889 | Range16{0xfb20, 0xfb28, 1}, |
| 890 | Range16{0xfb2a, 0xfb36, 1}, |
| 891 | Range16{0xfb38, 0xfb3c, 1}, |
| 892 | Range16{0xfb3e, 0xfb40, 2}, |
| 893 | Range16{0xfb41, 0xfb43, 2}, |
| 894 | Range16{0xfb44, 0xfb46, 2}, |
| 895 | Range16{0xfb47, 0xfbb1, 1}, |
| 896 | Range16{0xfbd3, 0xfd3d, 1}, |
| 897 | Range16{0xfd50, 0xfd8f, 1}, |
| 898 | Range16{0xfd92, 0xfdc7, 1}, |
| 899 | Range16{0xfdf0, 0xfdfb, 1}, |
| 900 | Range16{0xfe70, 0xfe74, 1}, |
| 901 | Range16{0xfe76, 0xfefc, 1}, |
| 902 | Range16{0xff21, 0xff3a, 1}, |
| 903 | Range16{0xff41, 0xff5a, 1}, |
| 904 | Range16{0xff66, 0xffbe, 1}, |
| 905 | Range16{0xffc2, 0xffc7, 1}, |
| 906 | Range16{0xffca, 0xffcf, 1}, |
| 907 | Range16{0xffd2, 0xffd7, 1}, |
| 908 | Range16{0xffda, 0xffdc, 1}, |
| 909 | ] |
| 910 | r32: [ |
| 911 | Range32{0x10000, 0x1000b, 1}, |
| 912 | Range32{0x1000d, 0x10026, 1}, |
| 913 | Range32{0x10028, 0x1003a, 1}, |
| 914 | Range32{0x1003c, 0x1003d, 1}, |
| 915 | Range32{0x1003f, 0x1004d, 1}, |
| 916 | Range32{0x10050, 0x1005d, 1}, |
| 917 | Range32{0x10080, 0x100fa, 1}, |
| 918 | Range32{0x10280, 0x1029c, 1}, |
| 919 | Range32{0x102a0, 0x102d0, 1}, |
| 920 | Range32{0x10300, 0x1031f, 1}, |
| 921 | Range32{0x1032d, 0x10340, 1}, |
| 922 | Range32{0x10342, 0x10349, 1}, |
| 923 | Range32{0x10350, 0x10375, 1}, |
| 924 | Range32{0x10380, 0x1039d, 1}, |
| 925 | Range32{0x103a0, 0x103c3, 1}, |
| 926 | Range32{0x103c8, 0x103cf, 1}, |
| 927 | Range32{0x10400, 0x1049d, 1}, |
| 928 | Range32{0x104b0, 0x104d3, 1}, |
| 929 | Range32{0x104d8, 0x104fb, 1}, |
| 930 | Range32{0x10500, 0x10527, 1}, |
| 931 | Range32{0x10530, 0x10563, 1}, |
| 932 | Range32{0x10600, 0x10736, 1}, |
| 933 | Range32{0x10740, 0x10755, 1}, |
| 934 | Range32{0x10760, 0x10767, 1}, |
| 935 | Range32{0x10800, 0x10805, 1}, |
| 936 | Range32{0x10808, 0x1080a, 2}, |
| 937 | Range32{0x1080b, 0x10835, 1}, |
| 938 | Range32{0x10837, 0x10838, 1}, |
| 939 | Range32{0x1083c, 0x1083f, 3}, |
| 940 | Range32{0x10840, 0x10855, 1}, |
| 941 | Range32{0x10860, 0x10876, 1}, |
| 942 | Range32{0x10880, 0x1089e, 1}, |
| 943 | Range32{0x108e0, 0x108f2, 1}, |
| 944 | Range32{0x108f4, 0x108f5, 1}, |
| 945 | Range32{0x10900, 0x10915, 1}, |
| 946 | Range32{0x10920, 0x10939, 1}, |
| 947 | Range32{0x10980, 0x109b7, 1}, |
| 948 | Range32{0x109be, 0x109bf, 1}, |
| 949 | Range32{0x10a00, 0x10a10, 16}, |
| 950 | Range32{0x10a11, 0x10a13, 1}, |
| 951 | Range32{0x10a15, 0x10a17, 1}, |
| 952 | Range32{0x10a19, 0x10a35, 1}, |
| 953 | Range32{0x10a60, 0x10a7c, 1}, |
| 954 | Range32{0x10a80, 0x10a9c, 1}, |
| 955 | Range32{0x10ac0, 0x10ac7, 1}, |
| 956 | Range32{0x10ac9, 0x10ae4, 1}, |
| 957 | Range32{0x10b00, 0x10b35, 1}, |
| 958 | Range32{0x10b40, 0x10b55, 1}, |
| 959 | Range32{0x10b60, 0x10b72, 1}, |
| 960 | Range32{0x10b80, 0x10b91, 1}, |
| 961 | Range32{0x10c00, 0x10c48, 1}, |
| 962 | Range32{0x10c80, 0x10cb2, 1}, |
| 963 | Range32{0x10cc0, 0x10cf2, 1}, |
| 964 | Range32{0x10d00, 0x10d23, 1}, |
| 965 | Range32{0x10e80, 0x10ea9, 1}, |
| 966 | Range32{0x10eb0, 0x10eb1, 1}, |
| 967 | Range32{0x10f00, 0x10f1c, 1}, |
| 968 | Range32{0x10f27, 0x10f30, 9}, |
| 969 | Range32{0x10f31, 0x10f45, 1}, |
| 970 | Range32{0x10fb0, 0x10fc4, 1}, |
| 971 | Range32{0x10fe0, 0x10ff6, 1}, |
| 972 | Range32{0x11003, 0x11037, 1}, |
| 973 | Range32{0x11083, 0x110af, 1}, |
| 974 | Range32{0x110d0, 0x110e8, 1}, |
| 975 | Range32{0x11103, 0x11126, 1}, |
| 976 | Range32{0x11144, 0x11147, 3}, |
| 977 | Range32{0x11150, 0x11172, 1}, |
| 978 | Range32{0x11176, 0x11183, 13}, |
| 979 | Range32{0x11184, 0x111b2, 1}, |
| 980 | Range32{0x111c1, 0x111c4, 1}, |
| 981 | Range32{0x111da, 0x111dc, 2}, |
| 982 | Range32{0x11200, 0x11211, 1}, |
| 983 | Range32{0x11213, 0x1122b, 1}, |
| 984 | Range32{0x11280, 0x11286, 1}, |
| 985 | Range32{0x11288, 0x1128a, 2}, |
| 986 | Range32{0x1128b, 0x1128d, 1}, |
| 987 | Range32{0x1128f, 0x1129d, 1}, |
| 988 | Range32{0x1129f, 0x112a8, 1}, |
| 989 | Range32{0x112b0, 0x112de, 1}, |
| 990 | Range32{0x11305, 0x1130c, 1}, |
| 991 | Range32{0x1130f, 0x11310, 1}, |
| 992 | Range32{0x11313, 0x11328, 1}, |
| 993 | Range32{0x1132a, 0x11330, 1}, |
| 994 | Range32{0x11332, 0x11333, 1}, |
| 995 | Range32{0x11335, 0x11339, 1}, |
| 996 | Range32{0x1133d, 0x11350, 19}, |
| 997 | Range32{0x1135d, 0x11361, 1}, |
| 998 | Range32{0x11400, 0x11434, 1}, |
| 999 | Range32{0x11447, 0x1144a, 1}, |
| 1000 | Range32{0x1145f, 0x11461, 1}, |
| 1001 | Range32{0x11480, 0x114af, 1}, |
| 1002 | Range32{0x114c4, 0x114c5, 1}, |
| 1003 | Range32{0x114c7, 0x11580, 185}, |
| 1004 | Range32{0x11581, 0x115ae, 1}, |
| 1005 | Range32{0x115d8, 0x115db, 1}, |
| 1006 | Range32{0x11600, 0x1162f, 1}, |
| 1007 | Range32{0x11644, 0x11680, 60}, |
| 1008 | Range32{0x11681, 0x116aa, 1}, |
| 1009 | Range32{0x116b8, 0x11700, 72}, |
| 1010 | Range32{0x11701, 0x1171a, 1}, |
| 1011 | Range32{0x11800, 0x1182b, 1}, |
| 1012 | Range32{0x118a0, 0x118df, 1}, |
| 1013 | Range32{0x118ff, 0x11906, 1}, |
| 1014 | Range32{0x11909, 0x1190c, 3}, |
| 1015 | Range32{0x1190d, 0x11913, 1}, |
| 1016 | Range32{0x11915, 0x11916, 1}, |
| 1017 | Range32{0x11918, 0x1192f, 1}, |
| 1018 | Range32{0x1193f, 0x11941, 2}, |
| 1019 | Range32{0x119a0, 0x119a7, 1}, |
| 1020 | Range32{0x119aa, 0x119d0, 1}, |
| 1021 | Range32{0x119e1, 0x119e3, 2}, |
| 1022 | Range32{0x11a00, 0x11a0b, 11}, |
| 1023 | Range32{0x11a0c, 0x11a32, 1}, |
| 1024 | Range32{0x11a3a, 0x11a50, 22}, |
| 1025 | Range32{0x11a5c, 0x11a89, 1}, |
| 1026 | Range32{0x11a9d, 0x11ac0, 35}, |
| 1027 | Range32{0x11ac1, 0x11af8, 1}, |
| 1028 | Range32{0x11c00, 0x11c08, 1}, |
| 1029 | Range32{0x11c0a, 0x11c2e, 1}, |
| 1030 | Range32{0x11c40, 0x11c72, 50}, |
| 1031 | Range32{0x11c73, 0x11c8f, 1}, |
| 1032 | Range32{0x11d00, 0x11d06, 1}, |
| 1033 | Range32{0x11d08, 0x11d09, 1}, |
| 1034 | Range32{0x11d0b, 0x11d30, 1}, |
| 1035 | Range32{0x11d46, 0x11d60, 26}, |
| 1036 | Range32{0x11d61, 0x11d65, 1}, |
| 1037 | Range32{0x11d67, 0x11d68, 1}, |
| 1038 | Range32{0x11d6a, 0x11d89, 1}, |
| 1039 | Range32{0x11d98, 0x11ee0, 328}, |
| 1040 | Range32{0x11ee1, 0x11ef2, 1}, |
| 1041 | Range32{0x11fb0, 0x12000, 80}, |
| 1042 | Range32{0x12001, 0x12399, 1}, |
| 1043 | Range32{0x12480, 0x12543, 1}, |
| 1044 | Range32{0x13000, 0x1342e, 1}, |
| 1045 | Range32{0x14400, 0x14646, 1}, |
| 1046 | Range32{0x16800, 0x16a38, 1}, |
| 1047 | Range32{0x16a40, 0x16a5e, 1}, |
| 1048 | Range32{0x16ad0, 0x16aed, 1}, |
| 1049 | Range32{0x16b00, 0x16b2f, 1}, |
| 1050 | Range32{0x16b40, 0x16b43, 1}, |
| 1051 | Range32{0x16b63, 0x16b77, 1}, |
| 1052 | Range32{0x16b7d, 0x16b8f, 1}, |
| 1053 | Range32{0x16e40, 0x16e7f, 1}, |
| 1054 | Range32{0x16f00, 0x16f4a, 1}, |
| 1055 | Range32{0x16f50, 0x16f93, 67}, |
| 1056 | Range32{0x16f94, 0x16f9f, 1}, |
| 1057 | Range32{0x16fe0, 0x16fe1, 1}, |
| 1058 | Range32{0x16fe3, 0x17000, 29}, |
| 1059 | Range32{0x17001, 0x187f7, 1}, |
| 1060 | Range32{0x18800, 0x18cd5, 1}, |
| 1061 | Range32{0x18d00, 0x18d08, 1}, |
| 1062 | Range32{0x1b000, 0x1b11e, 1}, |
| 1063 | Range32{0x1b150, 0x1b152, 1}, |
| 1064 | Range32{0x1b164, 0x1b167, 1}, |
| 1065 | Range32{0x1b170, 0x1b2fb, 1}, |
| 1066 | Range32{0x1bc00, 0x1bc6a, 1}, |
| 1067 | Range32{0x1bc70, 0x1bc7c, 1}, |
| 1068 | Range32{0x1bc80, 0x1bc88, 1}, |
| 1069 | Range32{0x1bc90, 0x1bc99, 1}, |
| 1070 | Range32{0x1d400, 0x1d454, 1}, |
| 1071 | Range32{0x1d456, 0x1d49c, 1}, |
| 1072 | Range32{0x1d49e, 0x1d49f, 1}, |
| 1073 | Range32{0x1d4a2, 0x1d4a5, 3}, |
| 1074 | Range32{0x1d4a6, 0x1d4a9, 3}, |
| 1075 | Range32{0x1d4aa, 0x1d4ac, 1}, |
| 1076 | Range32{0x1d4ae, 0x1d4b9, 1}, |
| 1077 | Range32{0x1d4bb, 0x1d4bd, 2}, |
| 1078 | Range32{0x1d4be, 0x1d4c3, 1}, |
| 1079 | Range32{0x1d4c5, 0x1d505, 1}, |
| 1080 | Range32{0x1d507, 0x1d50a, 1}, |
| 1081 | Range32{0x1d50d, 0x1d514, 1}, |
| 1082 | Range32{0x1d516, 0x1d51c, 1}, |
| 1083 | Range32{0x1d51e, 0x1d539, 1}, |
| 1084 | Range32{0x1d53b, 0x1d53e, 1}, |
| 1085 | Range32{0x1d540, 0x1d544, 1}, |
| 1086 | Range32{0x1d546, 0x1d54a, 4}, |
| 1087 | Range32{0x1d54b, 0x1d550, 1}, |
| 1088 | Range32{0x1d552, 0x1d6a5, 1}, |
| 1089 | Range32{0x1d6a8, 0x1d6c0, 1}, |
| 1090 | Range32{0x1d6c2, 0x1d6da, 1}, |
| 1091 | Range32{0x1d6dc, 0x1d6fa, 1}, |
| 1092 | Range32{0x1d6fc, 0x1d714, 1}, |
| 1093 | Range32{0x1d716, 0x1d734, 1}, |
| 1094 | Range32{0x1d736, 0x1d74e, 1}, |
| 1095 | Range32{0x1d750, 0x1d76e, 1}, |
| 1096 | Range32{0x1d770, 0x1d788, 1}, |
| 1097 | Range32{0x1d78a, 0x1d7a8, 1}, |
| 1098 | Range32{0x1d7aa, 0x1d7c2, 1}, |
| 1099 | Range32{0x1d7c4, 0x1d7cb, 1}, |
| 1100 | Range32{0x1e100, 0x1e12c, 1}, |
| 1101 | Range32{0x1e137, 0x1e13d, 1}, |
| 1102 | Range32{0x1e14e, 0x1e2c0, 370}, |
| 1103 | Range32{0x1e2c1, 0x1e2eb, 1}, |
| 1104 | Range32{0x1e800, 0x1e8c4, 1}, |
| 1105 | Range32{0x1e900, 0x1e943, 1}, |
| 1106 | Range32{0x1e94b, 0x1ee00, 1205}, |
| 1107 | Range32{0x1ee01, 0x1ee03, 1}, |
| 1108 | Range32{0x1ee05, 0x1ee1f, 1}, |
| 1109 | Range32{0x1ee21, 0x1ee22, 1}, |
| 1110 | Range32{0x1ee24, 0x1ee27, 3}, |
| 1111 | Range32{0x1ee29, 0x1ee32, 1}, |
| 1112 | Range32{0x1ee34, 0x1ee37, 1}, |
| 1113 | Range32{0x1ee39, 0x1ee3b, 2}, |
| 1114 | Range32{0x1ee42, 0x1ee47, 5}, |
| 1115 | Range32{0x1ee49, 0x1ee4d, 2}, |
| 1116 | Range32{0x1ee4e, 0x1ee4f, 1}, |
| 1117 | Range32{0x1ee51, 0x1ee52, 1}, |
| 1118 | Range32{0x1ee54, 0x1ee57, 3}, |
| 1119 | Range32{0x1ee59, 0x1ee61, 2}, |
| 1120 | Range32{0x1ee62, 0x1ee64, 2}, |
| 1121 | Range32{0x1ee67, 0x1ee6a, 1}, |
| 1122 | Range32{0x1ee6c, 0x1ee72, 1}, |
| 1123 | Range32{0x1ee74, 0x1ee77, 1}, |
| 1124 | Range32{0x1ee79, 0x1ee7c, 1}, |
| 1125 | Range32{0x1ee7e, 0x1ee80, 2}, |
| 1126 | Range32{0x1ee81, 0x1ee89, 1}, |
| 1127 | Range32{0x1ee8b, 0x1ee9b, 1}, |
| 1128 | Range32{0x1eea1, 0x1eea3, 1}, |
| 1129 | Range32{0x1eea5, 0x1eea9, 1}, |
| 1130 | Range32{0x1eeab, 0x1eebb, 1}, |
| 1131 | Range32{0x20000, 0x2a6dd, 1}, |
| 1132 | Range32{0x2a700, 0x2b734, 1}, |
| 1133 | Range32{0x2b740, 0x2b81d, 1}, |
| 1134 | Range32{0x2b820, 0x2cea1, 1}, |
| 1135 | Range32{0x2ceb0, 0x2ebe0, 1}, |
| 1136 | Range32{0x2f800, 0x2fa1d, 1}, |
| 1137 | Range32{0x30000, 0x3134a, 1}, |
| 1138 | ] |
| 1139 | latin_offset: 6 |
| 1140 | } |
| 1141 | |
| 1142 | // Represents all unicodes in unicode category Z with property white space. |
| 1143 | const white_space_table = RangeTable{ |
| 1144 | r16: [ |
| 1145 | Range16{0x0009, 0x000d, 1}, |
| 1146 | Range16{0x0020, 0x0085, 101}, |
| 1147 | Range16{0x00a0, 0x1680, 5600}, |
| 1148 | Range16{0x2000, 0x200a, 1}, |
| 1149 | Range16{0x2028, 0x2029, 1}, |
| 1150 | Range16{0x202f, 0x205f, 48}, |
| 1151 | Range16{0x3000, 0x3000, 1}, |
| 1152 | ] |
| 1153 | r32: [] |
| 1154 | latin_offset: 2 |
| 1155 | } |
| 1156 | |
| 1157 | // Represents all unicodes in unicode category N. |
| 1158 | const number_table = RangeTable{ |
| 1159 | r16: [ |
| 1160 | Range16{0x0030, 0x0039, 1}, |
| 1161 | Range16{0x00b2, 0x00b3, 1}, |
| 1162 | Range16{0x00b9, 0x00bc, 3}, |
| 1163 | Range16{0x00bd, 0x00be, 1}, |
| 1164 | Range16{0x0660, 0x0669, 1}, |
| 1165 | Range16{0x06f0, 0x06f9, 1}, |
| 1166 | Range16{0x07c0, 0x07c9, 1}, |
| 1167 | Range16{0x0966, 0x096f, 1}, |
| 1168 | Range16{0x09e6, 0x09ef, 1}, |
| 1169 | Range16{0x09f4, 0x09f9, 1}, |
| 1170 | Range16{0x0a66, 0x0a6f, 1}, |
| 1171 | Range16{0x0ae6, 0x0aef, 1}, |
| 1172 | Range16{0x0b66, 0x0b6f, 1}, |
| 1173 | Range16{0x0b72, 0x0b77, 1}, |
| 1174 | Range16{0x0be6, 0x0bf2, 1}, |
| 1175 | Range16{0x0c66, 0x0c6f, 1}, |
| 1176 | Range16{0x0c78, 0x0c7e, 1}, |
| 1177 | Range16{0x0ce6, 0x0cef, 1}, |
| 1178 | Range16{0x0d58, 0x0d5e, 1}, |
| 1179 | Range16{0x0d66, 0x0d78, 1}, |
| 1180 | Range16{0x0de6, 0x0def, 1}, |
| 1181 | Range16{0x0e50, 0x0e59, 1}, |
| 1182 | Range16{0x0ed0, 0x0ed9, 1}, |
| 1183 | Range16{0x0f20, 0x0f33, 1}, |
| 1184 | Range16{0x1040, 0x1049, 1}, |
| 1185 | Range16{0x1090, 0x1099, 1}, |
| 1186 | Range16{0x1369, 0x137c, 1}, |
| 1187 | Range16{0x16ee, 0x16f0, 1}, |
| 1188 | Range16{0x17e0, 0x17e9, 1}, |
| 1189 | Range16{0x17f0, 0x17f9, 1}, |
| 1190 | Range16{0x1810, 0x1819, 1}, |
| 1191 | Range16{0x1946, 0x194f, 1}, |
| 1192 | Range16{0x19d0, 0x19da, 1}, |
| 1193 | Range16{0x1a80, 0x1a89, 1}, |
| 1194 | Range16{0x1a90, 0x1a99, 1}, |
| 1195 | Range16{0x1b50, 0x1b59, 1}, |
| 1196 | Range16{0x1bb0, 0x1bb9, 1}, |
| 1197 | Range16{0x1c40, 0x1c49, 1}, |
| 1198 | Range16{0x1c50, 0x1c59, 1}, |
| 1199 | Range16{0x2070, 0x2074, 4}, |
| 1200 | Range16{0x2075, 0x2079, 1}, |
| 1201 | Range16{0x2080, 0x2089, 1}, |
| 1202 | Range16{0x2150, 0x2182, 1}, |
| 1203 | Range16{0x2185, 0x2189, 1}, |
| 1204 | Range16{0x2460, 0x249b, 1}, |
| 1205 | Range16{0x24ea, 0x24ff, 1}, |
| 1206 | Range16{0x2776, 0x2793, 1}, |
| 1207 | Range16{0x2cfd, 0x3007, 778}, |
| 1208 | Range16{0x3021, 0x3029, 1}, |
| 1209 | Range16{0x3038, 0x303a, 1}, |
| 1210 | Range16{0x3192, 0x3195, 1}, |
| 1211 | Range16{0x3220, 0x3229, 1}, |
| 1212 | Range16{0x3248, 0x324f, 1}, |
| 1213 | Range16{0x3251, 0x325f, 1}, |
| 1214 | Range16{0x3280, 0x3289, 1}, |
| 1215 | Range16{0x32b1, 0x32bf, 1}, |
| 1216 | Range16{0xa620, 0xa629, 1}, |
| 1217 | Range16{0xa6e6, 0xa6ef, 1}, |
| 1218 | Range16{0xa830, 0xa835, 1}, |
| 1219 | Range16{0xa8d0, 0xa8d9, 1}, |
| 1220 | Range16{0xa900, 0xa909, 1}, |
| 1221 | Range16{0xa9d0, 0xa9d9, 1}, |
| 1222 | Range16{0xa9f0, 0xa9f9, 1}, |
| 1223 | Range16{0xaa50, 0xaa59, 1}, |
| 1224 | Range16{0xabf0, 0xabf9, 1}, |
| 1225 | Range16{0xff10, 0xff19, 1}, |
| 1226 | ] |
| 1227 | r32: [ |
| 1228 | Range32{0x10107, 0x10133, 1}, |
| 1229 | Range32{0x10140, 0x10178, 1}, |
| 1230 | Range32{0x1018a, 0x1018b, 1}, |
| 1231 | Range32{0x102e1, 0x102fb, 1}, |
| 1232 | Range32{0x10320, 0x10323, 1}, |
| 1233 | Range32{0x10341, 0x1034a, 9}, |
| 1234 | Range32{0x103d1, 0x103d5, 1}, |
| 1235 | Range32{0x104a0, 0x104a9, 1}, |
| 1236 | Range32{0x10858, 0x1085f, 1}, |
| 1237 | Range32{0x10879, 0x1087f, 1}, |
| 1238 | Range32{0x108a7, 0x108af, 1}, |
| 1239 | Range32{0x108fb, 0x108ff, 1}, |
| 1240 | Range32{0x10916, 0x1091b, 1}, |
| 1241 | Range32{0x109bc, 0x109bd, 1}, |
| 1242 | Range32{0x109c0, 0x109cf, 1}, |
| 1243 | Range32{0x109d2, 0x109ff, 1}, |
| 1244 | Range32{0x10a40, 0x10a48, 1}, |
| 1245 | Range32{0x10a7d, 0x10a7e, 1}, |
| 1246 | Range32{0x10a9d, 0x10a9f, 1}, |
| 1247 | Range32{0x10aeb, 0x10aef, 1}, |
| 1248 | Range32{0x10b58, 0x10b5f, 1}, |
| 1249 | Range32{0x10b78, 0x10b7f, 1}, |
| 1250 | Range32{0x10ba9, 0x10baf, 1}, |
| 1251 | Range32{0x10cfa, 0x10cff, 1}, |
| 1252 | Range32{0x10d30, 0x10d39, 1}, |
| 1253 | Range32{0x10e60, 0x10e7e, 1}, |
| 1254 | Range32{0x10f1d, 0x10f26, 1}, |
| 1255 | Range32{0x10f51, 0x10f54, 1}, |
| 1256 | Range32{0x10fc5, 0x10fcb, 1}, |
| 1257 | Range32{0x11052, 0x1106f, 1}, |
| 1258 | Range32{0x110f0, 0x110f9, 1}, |
| 1259 | Range32{0x11136, 0x1113f, 1}, |
| 1260 | Range32{0x111d0, 0x111d9, 1}, |
| 1261 | Range32{0x111e1, 0x111f4, 1}, |
| 1262 | Range32{0x112f0, 0x112f9, 1}, |
| 1263 | Range32{0x11450, 0x11459, 1}, |
| 1264 | Range32{0x114d0, 0x114d9, 1}, |
| 1265 | Range32{0x11650, 0x11659, 1}, |
| 1266 | Range32{0x116c0, 0x116c9, 1}, |
| 1267 | Range32{0x11730, 0x1173b, 1}, |
| 1268 | Range32{0x118e0, 0x118f2, 1}, |
| 1269 | Range32{0x11950, 0x11959, 1}, |
| 1270 | Range32{0x11c50, 0x11c6c, 1}, |
| 1271 | Range32{0x11d50, 0x11d59, 1}, |
| 1272 | Range32{0x11da0, 0x11da9, 1}, |
| 1273 | Range32{0x11fc0, 0x11fd4, 1}, |
| 1274 | Range32{0x12400, 0x1246e, 1}, |
| 1275 | Range32{0x16a60, 0x16a69, 1}, |
| 1276 | Range32{0x16b50, 0x16b59, 1}, |
| 1277 | Range32{0x16b5b, 0x16b61, 1}, |
| 1278 | Range32{0x16e80, 0x16e96, 1}, |
| 1279 | Range32{0x1d2e0, 0x1d2f3, 1}, |
| 1280 | Range32{0x1d360, 0x1d378, 1}, |
| 1281 | Range32{0x1d7ce, 0x1d7ff, 1}, |
| 1282 | Range32{0x1e140, 0x1e149, 1}, |
| 1283 | Range32{0x1e2f0, 0x1e2f9, 1}, |
| 1284 | Range32{0x1e8c7, 0x1e8cf, 1}, |
| 1285 | Range32{0x1e950, 0x1e959, 1}, |
| 1286 | Range32{0x1ec71, 0x1ecab, 1}, |
| 1287 | Range32{0x1ecad, 0x1ecaf, 1}, |
| 1288 | Range32{0x1ecb1, 0x1ecb4, 1}, |
| 1289 | Range32{0x1ed01, 0x1ed2d, 1}, |
| 1290 | Range32{0x1ed2f, 0x1ed3d, 1}, |
| 1291 | Range32{0x1f100, 0x1f10c, 1}, |
| 1292 | Range32{0x1fbf0, 0x1fbf9, 1}, |
| 1293 | ] |
| 1294 | latin_offset: 4 |
| 1295 | } |
| 1296 | |
| 1297 | struct RangeTable { |
| 1298 | pub: |
| 1299 | r16 []Range16 |
| 1300 | r32 []Range32 |
| 1301 | latin_offset int |
| 1302 | } |
| 1303 | |
| 1304 | struct Range16 { |
| 1305 | pub: |
| 1306 | lo u16 |
| 1307 | hi u16 |
| 1308 | stride u16 |
| 1309 | } |
| 1310 | |
| 1311 | struct Range32 { |
| 1312 | pub: |
| 1313 | lo u32 |
| 1314 | hi u32 |
| 1315 | stride u32 |
| 1316 | } |
| 1317 | |
| 1318 | // tests if rune is in the given range table. |
| 1319 | fn is_excluding_latin(table &RangeTable, r rune) bool { |
| 1320 | r16 := &table.r16 |
| 1321 | off := table.latin_offset |
| 1322 | if r16.len > off && u32(r) < u32((*r16)[r16.len - 1].hi) { |
| 1323 | return is_16((*r16)[off..], u16(r)) |
| 1324 | } |
| 1325 | r32 := &table.r32 |
| 1326 | if r32.len > 0 && r >= rune((*r32)[0].lo) { |
| 1327 | return is_32(*r32, u32(r)) |
| 1328 | } |
| 1329 | return false |
| 1330 | } |
| 1331 | |
| 1332 | const linear_max = 18 |
| 1333 | |
| 1334 | fn is_16(ranges []Range16, r u16) bool { |
| 1335 | if ranges.len <= linear_max && r <= max_latin_1 { |
| 1336 | for range in ranges { |
| 1337 | if r < range.lo { |
| 1338 | return false |
| 1339 | } |
| 1340 | if r <= range.hi { |
| 1341 | return range.stride == 1 || (r - range.lo) % range.stride == 0 |
| 1342 | } |
| 1343 | } |
| 1344 | return false |
| 1345 | } |
| 1346 | |
| 1347 | // binary search |
| 1348 | mut low, mut high := 0, ranges.len |
| 1349 | for low < high { |
| 1350 | medium := low + (high - low) / 2 |
| 1351 | range := ranges[medium] |
| 1352 | if range.lo <= r && r <= range.hi { |
| 1353 | return range.stride == 1 || (r - range.lo) % range.stride == 0 |
| 1354 | } |
| 1355 | if r < range.lo { |
| 1356 | high = medium |
| 1357 | } else { |
| 1358 | low = medium + 1 |
| 1359 | } |
| 1360 | } |
| 1361 | |
| 1362 | return false |
| 1363 | } |
| 1364 | |
| 1365 | fn is_32(ranges []Range32, r u32) bool { |
| 1366 | if ranges.len <= linear_max && r <= max_latin_1 { |
| 1367 | for range in ranges { |
| 1368 | if r < range.lo { |
| 1369 | return false |
| 1370 | } |
| 1371 | if r <= range.hi { |
| 1372 | return range.stride == 1 || (r - range.lo) % range.stride == 0 |
| 1373 | } |
| 1374 | } |
| 1375 | return false |
| 1376 | } |
| 1377 | |
| 1378 | // binary search |
| 1379 | mut low, mut high := 0, ranges.len |
| 1380 | for low < high { |
| 1381 | medium := low + (high - low) / 2 |
| 1382 | range := ranges[medium] |
| 1383 | if range.lo <= r && r <= range.hi { |
| 1384 | return range.stride == 1 || (r - range.lo) % range.stride == 0 |
| 1385 | } |
| 1386 | if r < range.lo { |
| 1387 | high = medium |
| 1388 | } else { |
| 1389 | low = medium + 1 |
| 1390 | } |
| 1391 | } |
| 1392 | |
| 1393 | return false |
| 1394 | } |
| 1395 | |