| 1 | module builtin |
| 2 | |
| 3 | // This file contains Unicode grapheme cluster property tables used by string.graphemes() |
| 4 | // and utf8_str_visible_length(). The data is derived from Unicode 13.0.0 |
| 5 | // GraphemeBreakProperty.txt and emoji-data.txt. |
| 6 | |
| 7 | enum GraphemeBreakProperty { |
| 8 | other |
| 9 | cr |
| 10 | lf |
| 11 | control |
| 12 | extend |
| 13 | regional_indicator |
| 14 | prepend |
| 15 | spacing_mark |
| 16 | l |
| 17 | v |
| 18 | t |
| 19 | lv |
| 20 | lvt |
| 21 | zwj |
| 22 | } |
| 23 | |
| 24 | const grapheme_control_ranges = '00000000090000000b0000000c0000000e0000001f0000007f0000009f000000ad000000ad0000001c0600001c0600000e1800000e1800000b2000000b2000000e2000000f200000282000002820000029200000292000002a2000002e20000060200000642000006520000065200000662000006f200000fffe0000fffe0000f0ff0000f8ff0000f9ff0000fbff00003034010038340100a0bc0100a3bc010073d101007ad1010000000e0000000e0001000e0001000e0002000e001f000e0080000e00ff000e00f0010e00ff0f0e00' |
| 25 | |
| 26 | const grapheme_extend_ranges = |
| 27 | '000300006f0300008304000087040000880400008904000091050000bd050000bf050000bf050000c1050000c2050000c4050000c5050000c7050000c7050000100600001a0600004b0600005f0600007006000070060000d6060000dc060000df060000e4060000e7060000e8060000ea060000ed0600001107000011070000300700004a070000a6070000b0070000eb070000f3070000fd070000fd07000016080000190800001b080000230800002508000027080000290800002d080000590800005b080000d3080000e1080000e3080000020900003a0900003a0900003c0900003c09000041090000480900004d0900004d090000510900005709000062090000630900008109000081090000bc090000bc090000be090000be090000c1090000c4090000cd090000cd090000d7090000d7090000e2090000e3090000fe090000fe090000010a0000020a00003c0a00003c0a0000410a0000420a0000470a0000480a00004b0a00004d0a0000510a0000510a0000700a0000710a0000750a0000750a0000810a0000820a0000bc0a0000bc0a0000c10a0000c50a0000c70a0000c80a0000cd0a0000cd0a0000e20a0000e30a0000fa0a0000ff0a0000010b0000010b00003c0b00003c0b00003e0b00003e0b00003f0b00003f0b0000410b0000440b00004d0b00004d0b0000550b0000560b0000570b0000570b0000620b0000630b0000820b0000820b0000be0b0000be0b0000c00b0000c00b0000cd0b0000cd0b0000d70b0000d70b0000000c0000000c0000040c0000040c00003e0c0000400c0000460c0000480c00004a0c00004d0c0000550c0000560c0000620c0000630c0000810c0000810c0000bc0c0000bc0c0000bf0c0000bf0c0000c20c0000c20c0000c60c0000c60c0000cc0c0000cd0c0000d50c0000d60c0000e20c0000e30c0000000d0000010d00003b0d00003c0d00003e0d00003e0d0000410d0000440d00004d0d00004d0d0000570d0000570d0000620d0000630d0000810d0000810d0000ca0d0000ca0d0000cf0d0000cf0d0000d20d0000d40d0000d60d0000d60d0000df0d0000df0d0000310e0000310e0000340e00003a0e0000470e00004e0e0000b10e0000b10e0000b40e0000bc0e0000c80e0000cd0e0000180f0000190f0000350f0000350f0000370f0000370f0000390f0000390f0000710f00007e0f0000800f0000840f0000860f0000870f00008d0f0000970f0000990f0000bc0f0000c60f0000c60f00002d100000301000003210000037100000391000003a1000003d1000003e10000058100000591000005e100000601000007110000074100000821000008210000085100000861000008d1000008d1000009d1000009d1000005d1300005f1300001217000014170000321700003417000052170000531700007217000073170000b4170000b5170000b7170000bd170000c6170000c6170000c9170000d3170000dd170000dd1700000b1800000d1800008518000086180000a9180000a9180000201900002219000027190000281900003219000032190000391900003b190000171a0000181a00001b1a00001b1a0000561a0000561a0000581a00005e1a0000601a0000601a0000621a0000621a0000651a00006c1a0000731a00007c1a00007f1a00007f1a0000b01a0000bd1a0000be1a0000be1a0000bf1a0000c01a0000001b0000031b0000341b0000341b0000351b0000351b0000361b00003a1b00003c1b00003c1b0000421b0000421b00006b1b0000731b0000801b0000811b0000a21b0000a51b0000a81b0000a91b0000ab1b0000ad1b0000e61b0000e61b0000e81b0000e91b0000ed1b0000ed1b0000ef1b0000f11b00002c1c0000331c0000361c0000371c0000d01c0000d21c0000d41c0000e01c0000e21c0000e81c0000ed1c0000ed1c0000f41c0000f41c0000f81c0000f91c0000c01d0000f91d0000fb1d0000ff1d00000c2000000c200000d0200000dc200000dd200000e0200000e1200000e1200000e2200000e4200000e5200000f0200000ef2c0000f12c00007f2d00007f2d0000e02d0000ff2d00002a3000002d3000002e3000002f300000993000009a3000006fa600006fa60000' + |
| 28 | '70a6000072a6000074a600007da600009ea600009fa60000f0a60000f1a6000002a8000002a8000006a8000006a800000ba800000ba8000025a8000026a800002ca800002ca80000c4a80000c5a80000e0a80000f1a80000ffa80000ffa8000026a900002da9000047a9000051a9000080a9000082a90000b3a90000b3a90000b6a90000b9a90000bca90000bda90000e5a90000e5a9000029aa00002eaa000031aa000032aa000035aa000036aa000043aa000043aa00004caa00004caa00007caa00007caa0000b0aa0000b0aa0000b2aa0000b4aa0000b7aa0000b8aa0000beaa0000bfaa0000c1aa0000c1aa0000ecaa0000edaa0000f6aa0000f6aa0000e5ab0000e5ab0000e8ab0000e8ab0000edab0000edab00001efb00001efb000000fe00000ffe000020fe00002ffe00009eff00009fff0000fd010100fd010100e0020100e0020100760301007a030100010a0100030a0100050a0100060a01000c0a01000f0a0100380a01003a0a01003f0a01003f0a0100e50a0100e60a0100240d0100270d0100ab0e0100ac0e0100460f0100500f0100011001000110010038100100461001007f10010081100100b3100100b6100100b9100100ba1001000011010002110100271101002b1101002d1101003411010073110100731101008011010081110100b6110100be110100c9110100cc110100cf110100cf1101002f12010031120100341201003412010036120100371201003e1201003e120100df120100df120100e3120100ea12010000130100011301003b1301003c1301003e1301003e13010040130100401301005713010057130100661301006c1301007013010074130100381401003f140100421401004414010046140100461401005e1401005e140100b0140100b0140100b3140100b8140100ba140100ba140100bd140100bd140100bf140100c0140100c2140100c3140100af150100af150100b2150100b5150100bc150100bd150100bf150100c0150100dc150100dd150100331601003a1601003d1601003d1601003f16010040160100ab160100ab160100ad160100ad160100b0160100b5160100b7160100b71601001d1701001f1701002217010025170100271701002b1701002f18010037180100391801003a18010030190100301901003b1901003c1901003e1901003e1901004319010043190100d4190100d7190100da190100db190100e0190100e0190100011a01000a1a0100331a0100381a01003b1a01003e1a0100471a0100471a0100511a0100561a0100591a01005b1a01008a1a0100961a0100981a0100991a0100301c0100361c0100381c01003d1c01003f1c01003f1c0100921c0100a71c0100aa1c0100b01c0100b21c0100b31c0100b51c0100b61c0100311d0100361d01003a1d01003a1d01003c1d01003d1d01003f1d0100451d0100471d0100471d0100901d0100911d0100951d0100951d0100971d0100971d0100f31e0100f41e0100f06a0100f46a0100306b0100366b01004f6f01004f6f01008f6f0100926f0100e46f0100e46f01009dbc01009ebc010065d1010065d1010067d1010069d101006ed1010072d101007bd1010082d1010085d101008bd10100aad10100add1010042d2010044d2010000da010036da01003bda01006cda010075da010075da010084da010084da01009bda01009fda0100a1da0100afda010000e0010006e0010008e0010018e001001be0010021e0010023e0010024e0010026e001002ae0010030e1010036e10100ece20100efe20100d0e80100d6e8010044e901004ae90100fbf30100fff3010020000e007f000e0000010e00ef010e00' |
| 29 | |
| 30 | const grapheme_spacing_mark_ranges = '03090000030900003b0900003b0900003e09000040090000490900004c0900004e0900004f0900008209000083090000bf090000c0090000c7090000c8090000cb090000cc090000030a0000030a00003e0a0000400a0000830a0000830a0000be0a0000c00a0000c90a0000c90a0000cb0a0000cc0a0000020b0000030b0000400b0000400b0000470b0000480b00004b0b00004c0b0000bf0b0000bf0b0000c10b0000c20b0000c60b0000c80b0000ca0b0000cc0b0000010c0000030c0000410c0000440c0000820c0000830c0000be0c0000be0c0000c00c0000c10c0000c30c0000c40c0000c70c0000c80c0000ca0c0000cb0c0000020d0000030d00003f0d0000400d0000460d0000480d00004a0d00004c0d0000820d0000830d0000d00d0000d10d0000d80d0000de0d0000f20d0000f30d0000330e0000330e0000b30e0000b30e00003e0f00003f0f00007f0f00007f0f000031100000311000003b1000003c10000056100000571000008410000084100000b6170000b6170000be170000c5170000c7170000c81700002319000026190000291900002b19000030190000311900003319000038190000191a00001a1a0000551a0000551a0000571a0000571a00006d1a0000721a0000041b0000041b00003b1b00003b1b00003d1b0000411b0000431b0000441b0000821b0000821b0000a11b0000a11b0000a61b0000a71b0000aa1b0000aa1b0000e71b0000e71b0000ea1b0000ec1b0000ee1b0000ee1b0000f21b0000f31b0000241c00002b1c0000341c0000351c0000e11c0000e11c0000f71c0000f71c000023a8000024a8000027a8000027a8000080a8000081a80000b4a80000c3a8000052a9000053a9000083a9000083a90000b4a90000b5a90000baa90000bba90000bea90000c0a900002faa000030aa000033aa000034aa00004daa00004daa0000ebaa0000ebaa0000eeaa0000efaa0000f5aa0000f5aa0000e3ab0000e4ab0000e6ab0000e7ab0000e9ab0000eaab0000ecab0000ecab0000001001000010010002100100021001008210010082100100b0100100b2100100b7100100b81001002c1101002c11010045110100461101008211010082110100b3110100b5110100bf110100c0110100ce110100ce1101002c1201002e12010032120100331201003512010035120100e0120100e212010002130100031301003f1301003f130100411301004413010047130100481301004b1301004d1301006213010063130100351401003714010040140100411401004514010045140100b1140100b2140100b9140100b9140100bb140100bc140100be140100be140100c1140100c1140100b0150100b1150100b8150100bb150100be150100be15010030160100321601003b1601003c1601003e1601003e160100ac160100ac160100ae160100af160100b6160100b6160100201701002117010026170100261701002c1801002e1801003818010038180100311901003519010037190100381901003d1901003d19010040190100401901004219010042190100d1190100d3190100dc190100df190100e4190100e4190100391a0100391a0100571a0100581a0100971a0100971a01002f1c01002f1c01003e1c01003e1c0100a91c0100a91c0100b11c0100b11c0100b41c0100b41c01008a1d01008e1d0100931d0100941d0100961d0100961d0100f51e0100f61e0100516f0100876f0100f06f0100f16f010066d1010066d101006dd101006dd10100' |
| 31 | |
| 32 | const grapheme_prepend_ranges = '0006000005060000dd060000dd0600000f0700000f070000e2080000e20800004e0d00004e0d0000bd100100bd100100cd100100cd100100c2110100c31101003f1901003f19010041190100411901003a1a01003a1a0100841a0100891a0100461d0100461d0100' |
| 33 | |
| 34 | const grapheme_extended_pictographic_ranges = |
| 35 | 'a9000000a9000000ae000000ae0000003c2000003c2000004920000049200000222100002221000039210000392100009421000099210000a9210000aa2100001a2300001b23000028230000282300008823000088230000cf230000cf230000e9230000ec230000ed230000ee230000ef230000ef230000f0230000f0230000f1230000f2230000f3230000f3230000f8230000fa230000c2240000c2240000aa250000ab250000b6250000b6250000c0250000c0250000fb250000fe2500000026000001260000022600000326000004260000042600000526000005260000072600000d2600000e2600000e2600000f2600001026000011260000112600001226000012260000142600001526000016260000172600001826000018260000192600001c2600001d2600001d2600001e2600001f2600002026000020260000212600002126000022260000232600002426000025260000262600002626000027260000292600002a2600002a2600002b2600002d2600002e2600002e2600002f2600002f260000302600003726000038260000392600003a2600003a2600003b2600003f26000040260000402600004126000041260000422600004226000043260000472600004826000053260000542600005e2600005f2600005f2600006026000060260000612600006226000063260000632600006426000064260000652600006626000067260000672600006826000068260000692600007a2600007b2600007b2600007c2600007d2600007e2600007e2600007f2600007f2600008026000085260000902600009126000092260000922600009326000093260000942600009426000095260000952600009626000097260000982600009826000099260000992600009a2600009a2600009b2600009c2600009d2600009f260000a0260000a1260000a2260000a6260000a7260000a7260000a8260000a9260000aa260000ab260000ac260000af260000b0260000b1260000b2260000bc260000bd260000be260000bf260000c3260000c4260000c5260000c6260000c7260000c8260000c8260000c9260000cd260000ce260000ce260000cf260000cf260000d0260000d0260000d1260000d1260000d2260000d2260000d3260000d3260000d4260000d4260000d5260000e8260000e9260000e9260000ea260000ea260000eb260000ef260000f0260000f1260000f2260000f3260000f4260000f4260000f5260000f5260000f6260000f6260000f7260000f9260000fa260000fa260000fb260000fc260000fd260000fd260000fe26000001270000022700000227000003270000042700000527000005270000082700000c2700000d2700000d2700000e2700000e2700000f2700000f27000010270000112700001227000012270000142700001427000016270000162700001d2700001d270000212700002127000028270000282700003327000034270000442700004427000047270000472700004c2700004c2700004e2700004e270000532700005527000057270000572700006327000063270000642700006427000065270000672700009527000097270000a1270000a1270000b0270000b0270000bf270000bf2700003429000035290000052b0000072b00001b2b00001c2b0000502b0000502b0000552b0000552b000030300000303000003d3000003d3000009732000097320000993200009932000000f0010003f0010004f0010004f0010005f00100cef00100cff00100cff00100d0f00100fff001000df101000ff101002ff101002ff101006cf101006ff1010070f1010071f101007ef101007ff101008ef101008ef1010091f101009af10100adf10100e5f1010001f2010002f2010003f201000ff201001af201001af201002ff201002ff2010032f201003af201003cf201003ff2010049f201004ff2010050f2010051f2010052f20100fff2010000f301000cf301000df301000ef301000ff301000ff3010010f3010010f3010011f3010011f3010012f3010012f3010013f3010015f3010016f3010018f3010019f3010019f301001af301001af301001bf301001bf301001cf301001cf301001df301001ef301001ff3010020f30100' + |
| 36 | '21f3010021f3010022f3010023f3010024f301002cf301002df301002ff3010030f3010031f3010032f3010033f3010034f3010035f3010036f3010036f3010037f301004af301004bf301004bf301004cf301004ff3010050f3010050f3010051f301007bf301007cf301007cf301007df301007df301007ef301007ff3010080f3010093f3010094f3010095f3010096f3010097f3010098f3010098f3010099f301009bf301009cf301009df301009ef301009ff30100a0f30100c4f30100c5f30100c5f30100c6f30100c6f30100c7f30100c7f30100c8f30100c8f30100c9f30100c9f30100caf30100caf30100cbf30100cef30100cff30100d3f30100d4f30100dff30100e0f30100e3f30100e4f30100e4f30100e5f30100f0f30100f1f30100f2f30100f3f30100f3f30100f4f30100f4f30100f5f30100f5f30100f6f30100f6f30100f7f30100f7f30100f8f30100faf3010000f4010007f4010008f4010008f4010009f401000bf401000cf401000ef401000ff4010010f4010011f4010012f4010013f4010013f4010014f4010014f4010015f4010015f4010016f4010016f4010017f4010029f401002af401002af401002bf401003ef401003ff401003ff4010040f4010040f4010041f4010041f4010042f4010064f4010065f4010065f4010066f401006bf401006cf401006df401006ef40100acf40100adf40100adf40100aef40100b5f40100b6f40100b7f40100b8f40100ebf40100ecf40100edf40100eef40100eef40100eff40100eff40100f0f40100f4f40100f5f40100f5f40100f6f40100f7f40100f8f40100f8f40100f9f40100fcf40100fdf40100fdf40100fef40100fef40100fff4010002f5010003f5010003f5010004f5010007f5010008f5010008f5010009f5010009f501000af5010014f5010015f5010015f5010016f501002bf501002cf501002df501002ef501003df5010046f5010048f5010049f501004af501004bf501004ef501004ff501004ff5010050f501005bf501005cf5010067f5010068f501006ef501006ff5010070f5010071f5010072f5010073f5010079f501007af501007af501007bf5010086f5010087f5010087f5010088f5010089f501008af501008df501008ef501008ff5010090f5010090f5010091f5010094f5010095f5010096f5010097f50100a3f50100a4f50100a4f50100a5f50100a5f50100a6f50100a7f50100a8f50100a8f50100a9f50100b0f50100b1f50100b2f50100b3f50100bbf50100bcf50100bcf50100bdf50100c1f50100c2f50100c4f50100c5f50100d0f50100d1f50100d3f50100d4f50100dbf50100dcf50100def50100dff50100e0f50100e1f50100e1f50100e2f50100e2f50100e3f50100e3f50100e4f50100e7f50100e8f50100e8f50100e9f50100eef50100eff50100eff50100f0f50100f2f50100f3f50100f3f50100f4f50100f9f50100faf50100faf50100fbf50100fff5010000f6010000f6010001f6010006f6010007f6010008f6010009f601000df601000ef601000ef601000ff601000ff6010010f6010010f6010011f6010011f6010012f6010014f6010015f6010015f6010016f6010016f6010017f6010017f6010018f6010018f6010019f6010019f601001af601001af601001bf601001bf601001cf601001ef601001ff601001ff6010020f6010025f6010026f6010027f6010028f601002bf601002cf601002cf601002df601002df601002ef601002ff6010030f6010033f6010034f6010034f6010035f6010035f6010036f6010036f6010037f6010040f6010041f6010044f6010045f601004ff6010080f6010080f6010081f6010082f6010083f6010085f6010086f6010086f6010087f6010087f6010088f6010088f6010089f6010089f601008af601008bf601008cf601008cf601008df601008df601008ef601008ef601008ff601008ff6010090f6010090f6010091f6010093f6010094f6010094f6010095f6010095f6010096f6010096f6010097f6010097f6010098f6010098f6010099f601009af601009bf60100a1f60100a2f60100a2f60100a3f60100a3f60100a4f60100a5f60100a6f60100a6f60100a7f60100adf60100' + |
| 37 | 'aef60100b1f60100b2f60100b2f60100b3f60100b5f60100b6f60100b6f60100b7f60100b8f60100b9f60100bef60100bff60100bff60100c0f60100c0f60100c1f60100c5f60100c6f60100caf60100cbf60100cbf60100ccf60100ccf60100cdf60100cff60100d0f60100d0f60100d1f60100d2f60100d3f60100d4f60100d5f60100d5f60100d6f60100d7f60100d8f60100dff60100e0f60100e5f60100e6f60100e8f60100e9f60100e9f60100eaf60100eaf60100ebf60100ecf60100edf60100eff60100f0f60100f0f60100f1f60100f2f60100f3f60100f3f60100f4f60100f6f60100f7f60100f8f60100f9f60100f9f60100faf60100faf60100fbf60100fcf60100fdf60100fff6010074f701007ff70100d5f70100dff70100e0f70100ebf70100ecf70100fff701000cf801000ff8010048f801004ff801005af801005ff8010088f801008ff80100aef80100fff801000cf901000cf901000df901000ff9010010f9010018f9010019f901001ef901001ff901001ff9010020f9010027f9010028f901002ff9010030f9010030f9010031f9010032f9010033f901003af901003cf901003ef901003ff901003ff9010040f9010045f9010047f901004bf901004cf901004cf901004df901004ff9010050f901005ef901005ff901006bf901006cf9010070f9010071f9010071f9010072f9010072f9010073f9010076f9010077f9010078f9010079f9010079f901007af901007af901007bf901007bf901007cf901007ff9010080f9010084f9010085f9010091f9010092f9010097f9010098f90100a2f90100a3f90100a4f90100a5f90100aaf90100abf90100adf90100aef90100aff90100b0f90100b9f90100baf90100bff90100c0f90100c0f90100c1f90100c2f90100c3f90100caf90100cbf90100cbf90100ccf90100ccf90100cdf90100cff90100d0f90100e6f90100e7f90100fff9010000fa01006ffa010070fa010073fa010074fa010074fa010075fa010077fa010078fa01007afa01007bfa01007ffa010080fa010082fa010083fa010086fa010087fa01008ffa010090fa010095fa010096fa0100a8fa0100a9fa0100affa0100b0fa0100b6fa0100b7fa0100bffa0100c0fa0100c2fa0100c3fa0100cffa0100d0fa0100d6fa0100d7fa0100fffa010000fc0100fdff0100' |
| 38 | |
| 39 | @[inline] |
| 40 | fn grapheme_hex_nibble(c u8) u32 { |
| 41 | return if c <= `9` { u32(c - `0`) } else { u32((c | 0x20) - `a` + 10) } |
| 42 | } |
| 43 | |
| 44 | @[inline] |
| 45 | fn grapheme_hex_byte(ranges string, i int) u32 { |
| 46 | return (grapheme_hex_nibble(ranges[i]) << 4) | grapheme_hex_nibble(ranges[i + 1]) |
| 47 | } |
| 48 | |
| 49 | @[inline] |
| 50 | fn grapheme_range_value(ranges string, value_idx int) u32 { |
| 51 | i := value_idx * 8 |
| 52 | b0 := grapheme_hex_byte(ranges, i) |
| 53 | b1 := grapheme_hex_byte(ranges, i + 2) |
| 54 | b2 := grapheme_hex_byte(ranges, i + 4) |
| 55 | b3 := grapheme_hex_byte(ranges, i + 6) |
| 56 | return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24) |
| 57 | } |
| 58 | |
| 59 | @[inline] |
| 60 | fn in_grapheme_ranges(r rune, ranges string) bool { |
| 61 | target := u32(r) |
| 62 | mut low := 0 |
| 63 | mut high := ranges.len / 16 |
| 64 | for low < high { |
| 65 | mid := low + (high - low) / 2 |
| 66 | lo := grapheme_range_value(ranges, mid * 2) |
| 67 | hi := grapheme_range_value(ranges, mid * 2 + 1) |
| 68 | if target < lo { |
| 69 | high = mid |
| 70 | } else if target > hi { |
| 71 | low = mid + 1 |
| 72 | } else { |
| 73 | return true |
| 74 | } |
| 75 | } |
| 76 | return false |
| 77 | } |
| 78 | |
| 79 | @[inline] |
| 80 | fn grapheme_break_property(r rune) GraphemeBreakProperty { |
| 81 | if r == `\r` { |
| 82 | return .cr |
| 83 | } |
| 84 | if r == `\n` { |
| 85 | return .lf |
| 86 | } |
| 87 | if r == 0x200d { |
| 88 | return .zwj |
| 89 | } |
| 90 | if r >= 0x1f1e6 && r <= 0x1f1ff { |
| 91 | return .regional_indicator |
| 92 | } |
| 93 | if r >= 0xac00 && r <= 0xd7a3 { |
| 94 | return if (u32(r) - 0xac00) % 28 == 0 { .lv } else { .lvt } |
| 95 | } |
| 96 | if (r >= 0x1100 && r <= 0x115f) || (r >= 0xa960 && r <= 0xa97c) { |
| 97 | return .l |
| 98 | } |
| 99 | if (r >= 0x1160 && r <= 0x11a7) || (r >= 0xd7b0 && r <= 0xd7c6) { |
| 100 | return .v |
| 101 | } |
| 102 | if (r >= 0x11a8 && r <= 0x11ff) || (r >= 0xd7cb && r <= 0xd7fb) { |
| 103 | return .t |
| 104 | } |
| 105 | if in_grapheme_ranges(r, grapheme_control_ranges) { |
| 106 | return .control |
| 107 | } |
| 108 | if in_grapheme_ranges(r, grapheme_extend_ranges) { |
| 109 | return .extend |
| 110 | } |
| 111 | if in_grapheme_ranges(r, grapheme_spacing_mark_ranges) { |
| 112 | return .spacing_mark |
| 113 | } |
| 114 | if in_grapheme_ranges(r, grapheme_prepend_ranges) { |
| 115 | return .prepend |
| 116 | } |
| 117 | return .other |
| 118 | } |
| 119 | |
| 120 | @[inline] |
| 121 | fn is_extended_pictographic(r rune) bool { |
| 122 | return in_grapheme_ranges(r, grapheme_extended_pictographic_ranges) |
| 123 | } |
| 124 | |
| 125 | struct GraphemeState { |
| 126 | mut: |
| 127 | prev_prop GraphemeBreakProperty |
| 128 | ri_count int |
| 129 | extended_pictographic_state u8 |
| 130 | } |
| 131 | |
| 132 | @[inline] |
| 133 | fn grapheme_state_from_rune(r rune, prop GraphemeBreakProperty) GraphemeState { |
| 134 | return GraphemeState{ |
| 135 | prev_prop: prop |
| 136 | ri_count: if prop == .regional_indicator { 1 } else { 0 } |
| 137 | extended_pictographic_state: if is_extended_pictographic(r) { u8(1) } else { u8(0) } |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | @[inline] |
| 142 | fn (mut gs GraphemeState) push(r rune, prop GraphemeBreakProperty) { |
| 143 | gs.prev_prop = prop |
| 144 | gs.ri_count = if prop == .regional_indicator { gs.ri_count + 1 } else { 0 } |
| 145 | if is_extended_pictographic(r) { |
| 146 | gs.extended_pictographic_state = 1 |
| 147 | } else if prop == .extend && gs.extended_pictographic_state == 1 { |
| 148 | // Keep the `Extended_Pictographic Extend*` tail alive for GB11. |
| 149 | } else if prop == .zwj && gs.extended_pictographic_state == 1 { |
| 150 | gs.extended_pictographic_state = 2 |
| 151 | } else { |
| 152 | gs.extended_pictographic_state = 0 |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | @[inline] |
| 157 | fn should_break_grapheme(gs GraphemeState, r rune, prop GraphemeBreakProperty) bool { |
| 158 | match gs.prev_prop { |
| 159 | .cr { |
| 160 | if prop == .lf { |
| 161 | return false |
| 162 | } |
| 163 | return true |
| 164 | } |
| 165 | .lf, .control { |
| 166 | return true |
| 167 | } |
| 168 | .l { |
| 169 | if prop in [.l, .v, .lv, .lvt] { |
| 170 | return false |
| 171 | } |
| 172 | } |
| 173 | .lv, .v { |
| 174 | if prop in [.v, .t] { |
| 175 | return false |
| 176 | } |
| 177 | } |
| 178 | .lvt, .t { |
| 179 | if prop == .t { |
| 180 | return false |
| 181 | } |
| 182 | } |
| 183 | .prepend { |
| 184 | return false |
| 185 | } |
| 186 | .regional_indicator { |
| 187 | if prop == .regional_indicator && gs.ri_count % 2 == 1 { |
| 188 | return false |
| 189 | } |
| 190 | } |
| 191 | else {} |
| 192 | } |
| 193 | |
| 194 | if prop in [.cr, .lf, .control] { |
| 195 | return true |
| 196 | } |
| 197 | if prop in [.extend, .zwj, .spacing_mark] { |
| 198 | return false |
| 199 | } |
| 200 | if gs.extended_pictographic_state == 2 && is_extended_pictographic(r) { |
| 201 | return false |
| 202 | } |
| 203 | return true |
| 204 | } |
| 205 | |
| 206 | @[inline] |
| 207 | fn utf8_rune_visible_width(r rune, prop GraphemeBreakProperty) int { |
| 208 | if prop in [.extend, .zwj, .spacing_mark, .prepend] { |
| 209 | return 0 |
| 210 | } |
| 211 | // Keep the historical formatting behavior for common East Asian wide runes and emoji, |
| 212 | // but apply it per grapheme cluster instead of per code point. |
| 213 | if r >= 0x1100 |
| 214 | && (r <= 0x115f || r == 0x2329 || r == 0x232a || (r >= 0x2e80 && r <= 0xa4cf && r != 0x303f) |
| 215 | || (r >= 0xac00 && r <= 0xd7a3) || (r >= 0xf900 && r <= 0xfaff) |
| 216 | || (r >= 0xfe10 && r <= 0xfe19) || (r >= 0xfe30 && r <= 0xfe6f) |
| 217 | || (r >= 0xff00 && r <= 0xff60) || (r >= 0xffe0 && r <= 0xffe6) |
| 218 | || (r >= 0x1f300 && r <= 0x1f64f) || (r >= 0x1f680 && r <= 0x1f6ff) |
| 219 | || (r >= 0x1f900 && r <= 0x1f9ff) || (r >= 0x1fa70 && r <= 0x1faff) |
| 220 | || (r >= 0x20000 && r <= 0x3fffd)) { |
| 221 | return 2 |
| 222 | } |
| 223 | return 1 |
| 224 | } |
| 225 | |
| 226 | fn string_graphemes_impl(s string) []string { |
| 227 | runes := s.runes() |
| 228 | if runes.len == 0 { |
| 229 | return []string{} |
| 230 | } |
| 231 | mut res := []string{cap: runes.len} |
| 232 | mut cluster := []rune{cap: 4} |
| 233 | first_prop := grapheme_break_property(runes[0]) |
| 234 | mut state := grapheme_state_from_rune(runes[0], first_prop) |
| 235 | cluster << runes[0] |
| 236 | for r in runes[1..] { |
| 237 | prop := grapheme_break_property(r) |
| 238 | if should_break_grapheme(state, r, prop) { |
| 239 | res << cluster.string() |
| 240 | cluster = []rune{cap: 4} |
| 241 | cluster << r |
| 242 | state = grapheme_state_from_rune(r, prop) |
| 243 | continue |
| 244 | } |
| 245 | cluster << r |
| 246 | state.push(r, prop) |
| 247 | } |
| 248 | res << cluster.string() |
| 249 | return res |
| 250 | } |
| 251 | |
| 252 | @[inline] |
| 253 | fn utf8_grapheme_visible_length(s string) int { |
| 254 | runes := s.runes() |
| 255 | if runes.len == 0 { |
| 256 | return 0 |
| 257 | } |
| 258 | first_prop := grapheme_break_property(runes[0]) |
| 259 | mut state := grapheme_state_from_rune(runes[0], first_prop) |
| 260 | mut total := 0 |
| 261 | mut cluster_width := utf8_rune_visible_width(runes[0], first_prop) |
| 262 | for r in runes[1..] { |
| 263 | prop := grapheme_break_property(r) |
| 264 | if should_break_grapheme(state, r, prop) { |
| 265 | total += cluster_width |
| 266 | cluster_width = utf8_rune_visible_width(r, prop) |
| 267 | state = grapheme_state_from_rune(r, prop) |
| 268 | continue |
| 269 | } |
| 270 | rune_width := utf8_rune_visible_width(r, prop) |
| 271 | if rune_width > cluster_width { |
| 272 | cluster_width = rune_width |
| 273 | } |
| 274 | state.push(r, prop) |
| 275 | } |
| 276 | return total + cluster_width |
| 277 | } |
| 278 | |