summaryrefslogtreecommitdiff
path: root/doc/regexp/unicode_properties.rdoc
blob: a1d7ecc38041c7c670fe4689da8fe6b668c7f817 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
== \Regexps Based on Unicode Properties

The properties shown here are those currently supported in Ruby.
Older versions may not support all of these.

=== POSIX brackets

- <tt>\p{ASCII}</tt>
- <tt>\p{Alnum}</tt>
- <tt>\p{Alphabetic}</tt>, <tt>\p{Alpha}</tt>
- <tt>\p{Blank}</tt>
- <tt>\p{Cntrl}</tt>
- <tt>\p{Digit}</tt>
- <tt>\p{Graph}</tt>
- <tt>\p{Lowercase}</tt>, <tt>\p{Lower}</tt>
- <tt>\p{Print}</tt>
- <tt>\p{Punct}</tt>
- <tt>\p{Space}</tt>
- <tt>\p{Uppercase}</tt>, <tt>\p{Upper}</tt>
- <tt>\p{Word}</tt>
- <tt>\p{XDigit}</tt>
- <tt>\p{XPosixPunct}</tt>

=== Special

- <tt>\p{Any}</tt>
- <tt>\p{Assigned}</tt>

=== Major and General Categories

- <tt>\p{Cased_Letter}</tt>, <tt>\p{LC}</tt>
- <tt>\p{Close_Punctuation}</tt>, <tt>\p{Pe}</tt>
- <tt>\p{Connector_Punctuation}</tt>, <tt>\p{Pc}</tt>
- <tt>\p{Control}</tt>, <tt>\p{Cc}</tt>
- <tt>\p{Currency_Symbol}</tt>, <tt>\p{Sc}</tt>
- <tt>\p{Dash_Punctuation}</tt>, <tt>\p{Pd}</tt>
- <tt>\p{Decimal_Number}</tt>, <tt>\p{Nd}</tt>
- <tt>\p{Enclosing_Mark}</tt>, <tt>\p{Me}</tt>
- <tt>\p{Final_Punctuation}</tt>, <tt>\p{Pf}</tt>
- <tt>\p{Format}</tt>, <tt>\p{Cf}</tt>
- <tt>\p{Initial_Punctuation}</tt>, <tt>\p{Pi}</tt>
- <tt>\p{Letter}</tt>, <tt>\p{L}</tt>
- <tt>\p{Letter_Number}</tt>, <tt>\p{Nl}</tt>
- <tt>\p{Line_Separator}</tt>, <tt>\p{Zl}</tt>
- <tt>\p{Lowercase_Letter}</tt>, <tt>\p{Ll}</tt>
- <tt>\p{Mark}</tt>, <tt>\p{M}</tt>
- <tt>\p{Math_Symbol}</tt>, <tt>\p{Sm}</tt>
- <tt>\p{Modifier_Letter}</tt>, <tt>\p{Lm}</tt>
- <tt>\p{Modifier_Symbol}</tt>, <tt>\p{Sk}</tt>
- <tt>\p{Nonspacing_Mark}</tt>, <tt>\p{Mn}</tt>
- <tt>\p{Number}</tt>, <tt>\p{N}</tt>
- <tt>\p{Open_Punctuation}</tt>, <tt>\p{Ps}</tt>
- <tt>\p{Other}</tt>, <tt>\p{C}</tt>
- <tt>\p{Other_Letter}</tt>, <tt>\p{Lo}</tt>
- <tt>\p{Other_Number}</tt>, <tt>\p{No}</tt>
- <tt>\p{Other_Punctuation}</tt>, <tt>\p{Po}</tt>
- <tt>\p{Other_Symbol}</tt>, <tt>\p{So}</tt>
- <tt>\p{Paragraph_Separator}</tt>, <tt>\p{Zp}</tt>
- <tt>\p{Private_Use}</tt>, <tt>\p{Co}</tt>
- <tt>\p{Punctuation}</tt>, <tt>\p{P}</tt>
- <tt>\p{Separator}</tt>, <tt>\p{Z}</tt>
- <tt>\p{Space_Separator}</tt>, <tt>\p{Zs}</tt>
- <tt>\p{Spacing_Mark}</tt>, <tt>\p{Mc}</tt>
- <tt>\p{Surrogate}</tt>, <tt>\p{Cs}</tt>
- <tt>\p{Symbol}</tt>, <tt>\p{S}</tt>
- <tt>\p{Titlecase_Letter}</tt>, <tt>\p{Lt}</tt>
- <tt>\p{Unassigned}</tt>, <tt>\p{Cn}</tt>
- <tt>\p{Uppercase_Letter}</tt>, <tt>\p{Lu}</tt>

=== Prop List

- <tt>\p{ASCII_Hex_Digit}</tt>, <tt>\p{AHex}</tt>
- <tt>\p{Bidi_Control}</tt>, <tt>\p{Bidi_C}</tt>
- <tt>\p{Dash}</tt>
- <tt>\p{Deprecated}</tt>, <tt>\p{Dep}</tt>
- <tt>\p{Diacritic}</tt>, <tt>\p{Dia}</tt>
- <tt>\p{Extender}</tt>, <tt>\p{Ext}</tt>
- <tt>\p{Hex_Digit}</tt>, <tt>\p{Hex}</tt>
- <tt>\p{Hyphen}</tt>
- <tt>\p{IDS_Binary_Operator}</tt>, <tt>\p{IDSB}</tt>
- <tt>\p{IDS_Trinary_Operator}</tt>, <tt>\p{IDST}</tt>
- <tt>\p{Ideographic}</tt>, <tt>\p{Ideo}</tt>
- <tt>\p{Join_Control}</tt>, <tt>\p{Join_C}</tt>
- <tt>\p{Logical_Order_Exception}</tt>, <tt>\p{LOE}</tt>
- <tt>\p{Noncharacter_Code_Point}</tt>, <tt>\p{NChar}</tt>
- <tt>\p{Other_Alphabetic}</tt>, <tt>\p{OAlpha}</tt>
- <tt>\p{Other_Default_Ignorable_Code_Point}</tt>, <tt>\p{ODI}</tt>
- <tt>\p{Other_Grapheme_Extend}</tt>, <tt>\p{OGr_Ext}</tt>
- <tt>\p{Other_ID_Continue}</tt>, <tt>\p{OIDC}</tt>
- <tt>\p{Other_ID_Start}</tt>, <tt>\p{OIDS}</tt>
- <tt>\p{Other_Lowercase}</tt>, <tt>\p{OLower}</tt>
- <tt>\p{Other_Math}</tt>, <tt>\p{OMath}</tt>
- <tt>\p{Other_Uppercase}</tt>, <tt>\p{OUpper}</tt>
- <tt>\p{Pattern_Syntax}</tt>, <tt>\p{Pat_Syn}</tt>
- <tt>\p{Pattern_White_Space}</tt>, <tt>\p{Pat_WS}</tt>
- <tt>\p{Prepended_Concatenation_Mark}</tt>, <tt>\p{PCM}</tt>
- <tt>\p{Quotation_Mark}</tt>, <tt>\p{QMark}</tt>
- <tt>\p{Radical}</tt>
- <tt>\p{Regional_Indicator}</tt>, <tt>\p{RI}</tt>
- <tt>\p{Sentence_Terminal}</tt>, <tt>\p{STerm}</tt>
- <tt>\p{Soft_Dotted}</tt>, <tt>\p{SD}</tt>
- <tt>\p{Terminal_Punctuation}</tt>, <tt>\p{Term}</tt>
- <tt>\p{Unified_Ideograph}</tt>, <tt>\p{UIdeo}</tt>
- <tt>\p{Variation_Selector}</tt>, <tt>\p{VS}</tt>
- <tt>\p{White_Space}</tt>, <tt>\p{WSpace}</tt>

=== Derived Core Properties

- <tt>\p{Alphabetic}</tt>, <tt>\p{Alpha}</tt>
- <tt>\p{Case_Ignorable}</tt>, <tt>\p{CI}</tt>
- <tt>\p{Cased}</tt>
- <tt>\p{Changes_When_Casefolded}</tt>, <tt>\p{CWCF}</tt>
- <tt>\p{Changes_When_Casemapped}</tt>, <tt>\p{CWCM}</tt>
- <tt>\p{Changes_When_Lowercased}</tt>, <tt>\p{CWL}</tt>
- <tt>\p{Changes_When_Titlecased}</tt>, <tt>\p{CWT}</tt>
- <tt>\p{Changes_When_Uppercased}</tt>, <tt>\p{CWU}</tt>
- <tt>\p{Default_Ignorable_Code_Point}</tt>, <tt>\p{DI}</tt>
- <tt>\p{Grapheme_Base}</tt>, <tt>\p{Gr_Base}</tt>
- <tt>\p{Grapheme_Extend}</tt>, <tt>\p{Gr_Ext}</tt>
- <tt>\p{Grapheme_Link}</tt>, <tt>\p{Gr_Link}</tt>
- <tt>\p{ID_Continue}</tt>, <tt>\p{IDC}</tt>
- <tt>\p{ID_Start}</tt>, <tt>\p{IDS}</tt>
- <tt>\p{Lowercase}</tt>, <tt>\p{Lower}</tt>
- <tt>\p{Math}</tt>
- <tt>\p{Uppercase}</tt>, <tt>\p{Upper}</tt>
- <tt>\p{XID_Continue}</tt>, <tt>\p{XIDC}</tt>
- <tt>\p{XID_Start}</tt>, <tt>\p{XIDS}</tt>

=== Scripts

- <tt>\p{Adlam}</tt>, <tt>\p{Adlm}</tt>
- <tt>\p{Ahom}</tt>
- <tt>\p{Anatolian_Hieroglyphs}</tt>, <tt>\p{Hluw}</tt>
- <tt>\p{Arabic}</tt>, <tt>\p{Arab}</tt>
- <tt>\p{Armenian}</tt>, <tt>\p{Armn}</tt>
- <tt>\p{Avestan}</tt>, <tt>\p{Avst}</tt>
- <tt>\p{Balinese}</tt>, <tt>\p{Bali}</tt>
- <tt>\p{Bamum}</tt>, <tt>\p{Bamu}</tt>
- <tt>\p{Bassa_Vah}</tt>, <tt>\p{Bass}</tt>
- <tt>\p{Batak}</tt>, <tt>\p{Batk}</tt>
- <tt>\p{Bengali}</tt>, <tt>\p{Beng}</tt>
- <tt>\p{Bhaiksuki}</tt>, <tt>\p{Bhks}</tt>
- <tt>\p{Bopomofo}</tt>, <tt>\p{Bopo}</tt>
- <tt>\p{Brahmi}</tt>, <tt>\p{Brah}</tt>
- <tt>\p{Braille}</tt>, <tt>\p{Brai}</tt>
- <tt>\p{Buginese}</tt>, <tt>\p{Bugi}</tt>
- <tt>\p{Buhid}</tt>, <tt>\p{Buhd}</tt>
- <tt>\p{Canadian_Aboriginal}</tt>, <tt>\p{Cans}</tt>
- <tt>\p{Carian}</tt>, <tt>\p{Cari}</tt>
- <tt>\p{Caucasian_Albanian}</tt>, <tt>\p{Aghb}</tt>
- <tt>\p{Chakma}</tt>, <tt>\p{Cakm}</tt>
- <tt>\p{Cham}</tt>
- <tt>\p{Cherokee}</tt>, <tt>\p{Cher}</tt>
- <tt>\p{Chorasmian}</tt>, <tt>\p{Chrs}</tt>
- <tt>\p{Common}</tt>, <tt>\p{Zyyy}</tt>
- <tt>\p{Coptic}</tt>, <tt>\p{Copt}</tt>
- <tt>\p{Cuneiform}</tt>, <tt>\p{Xsux}</tt>
- <tt>\p{Cypriot}</tt>, <tt>\p{Cprt}</tt>
- <tt>\p{Cypro_Minoan}</tt>, <tt>\p{Cpmn}</tt>
- <tt>\p{Cyrillic}</tt>, <tt>\p{Cyrl}</tt>
- <tt>\p{Deseret}</tt>, <tt>\p{Dsrt}</tt>
- <tt>\p{Devanagari}</tt>, <tt>\p{Deva}</tt>
- <tt>\p{Dives_Akuru}</tt>, <tt>\p{Diak}</tt>
- <tt>\p{Dogra}</tt>, <tt>\p{Dogr}</tt>
- <tt>\p{Duployan}</tt>, <tt>\p{Dupl}</tt>
- <tt>\p{Egyptian_Hieroglyphs}</tt>, <tt>\p{Egyp}</tt>
- <tt>\p{Elbasan}</tt>, <tt>\p{Elba}</tt>
- <tt>\p{Elymaic}</tt>, <tt>\p{Elym}</tt>
- <tt>\p{Ethiopic}</tt>, <tt>\p{Ethi}</tt>
- <tt>\p{Georgian}</tt>, <tt>\p{Geor}</tt>
- <tt>\p{Glagolitic}</tt>, <tt>\p{Glag}</tt>
- <tt>\p{Gothic}</tt>, <tt>\p{Goth}</tt>
- <tt>\p{Grantha}</tt>, <tt>\p{Gran}</tt>
- <tt>\p{Greek}</tt>, <tt>\p{Grek}</tt>
- <tt>\p{Gujarati}</tt>, <tt>\p{Gujr}</tt>
- <tt>\p{Gunjala_Gondi}</tt>, <tt>\p{Gong}</tt>
- <tt>\p{Gurmukhi}</tt>, <tt>\p{Guru}</tt>
- <tt>\p{Han}</tt>, <tt>\p{Hani}</tt>
- <tt>\p{Hangul}</tt>, <tt>\p{Hang}</tt>
- <tt>\p{Hanifi_Rohingya}</tt>, <tt>\p{Rohg}</tt>
- <tt>\p{Hanunoo}</tt>, <tt>\p{Hano}</tt>
- <tt>\p{Hatran}</tt>, <tt>\p{Hatr}</tt>
- <tt>\p{Hebrew}</tt>, <tt>\p{Hebr}</tt>
- <tt>\p{Hiragana}</tt>, <tt>\p{Hira}</tt>
- <tt>\p{Imperial_Aramaic}</tt>, <tt>\p{Armi}</tt>
- <tt>\p{Inherited}</tt>, <tt>\p{Zinh}</tt>
- <tt>\p{Inscriptional_Pahlavi}</tt>, <tt>\p{Phli}</tt>
- <tt>\p{Inscriptional_Parthian}</tt>, <tt>\p{Prti}</tt>
- <tt>\p{Javanese}</tt>, <tt>\p{Java}</tt>
- <tt>\p{Kaithi}</tt>, <tt>\p{Kthi}</tt>
- <tt>\p{Kannada}</tt>, <tt>\p{Knda}</tt>
- <tt>\p{Katakana}</tt>, <tt>\p{Kana}</tt>
- <tt>\p{Kawi}</tt>
- <tt>\p{Kayah_Li}</tt>, <tt>\p{Kali}</tt>
- <tt>\p{Kharoshthi}</tt>, <tt>\p{Khar}</tt>
- <tt>\p{Khitan_Small_Script}</tt>, <tt>\p{Kits}</tt>
- <tt>\p{Khmer}</tt>, <tt>\p{Khmr}</tt>
- <tt>\p{Khojki}</tt>, <tt>\p{Khoj}</tt>
- <tt>\p{Khudawadi}</tt>, <tt>\p{Sind}</tt>
- <tt>\p{Lao}</tt>, <tt>\p{Laoo}</tt>
- <tt>\p{Latin}</tt>, <tt>\p{Latn}</tt>
- <tt>\p{Lepcha}</tt>, <tt>\p{Lepc}</tt>
- <tt>\p{Limbu}</tt>, <tt>\p{Limb}</tt>
- <tt>\p{Linear_A}</tt>, <tt>\p{Lina}</tt>
- <tt>\p{Linear_B}</tt>, <tt>\p{Linb}</tt>
- <tt>\p{Lisu}</tt>
- <tt>\p{Lycian}</tt>, <tt>\p{Lyci}</tt>
- <tt>\p{Lydian}</tt>, <tt>\p{Lydi}</tt>
- <tt>\p{Mahajani}</tt>, <tt>\p{Mahj}</tt>
- <tt>\p{Makasar}</tt>, <tt>\p{Maka}</tt>
- <tt>\p{Malayalam}</tt>, <tt>\p{Mlym}</tt>
- <tt>\p{Mandaic}</tt>, <tt>\p{Mand}</tt>
- <tt>\p{Manichaean}</tt>, <tt>\p{Mani}</tt>
- <tt>\p{Marchen}</tt>, <tt>\p{Marc}</tt>
- <tt>\p{Masaram_Gondi}</tt>, <tt>\p{Gonm}</tt>
- <tt>\p{Medefaidrin}</tt>, <tt>\p{Medf}</tt>
- <tt>\p{Meetei_Mayek}</tt>, <tt>\p{Mtei}</tt>
- <tt>\p{Mende_Kikakui}</tt>, <tt>\p{Mend}</tt>
- <tt>\p{Meroitic_Cursive}</tt>, <tt>\p{Merc}</tt>
- <tt>\p{Meroitic_Hieroglyphs}</tt>, <tt>\p{Mero}</tt>
- <tt>\p{Miao}</tt>, <tt>\p{Plrd}</tt>
- <tt>\p{Modi}</tt>
- <tt>\p{Mongolian}</tt>, <tt>\p{Mong}</tt>
- <tt>\p{Mro}</tt>, <tt>\p{Mroo}</tt>
- <tt>\p{Multani}</tt>, <tt>\p{Mult}</tt>
- <tt>\p{Myanmar}</tt>, <tt>\p{Mymr}</tt>
- <tt>\p{Nabataean}</tt>, <tt>\p{Nbat}</tt>
- <tt>\p{Nag_Mundari}</tt>, <tt>\p{Nagm}</tt>
- <tt>\p{Nandinagari}</tt>, <tt>\p{Nand}</tt>
- <tt>\p{New_Tai_Lue}</tt>, <tt>\p{Talu}</tt>
- <tt>\p{Newa}</tt>
- <tt>\p{Nko}</tt>, <tt>\p{Nkoo}</tt>
- <tt>\p{Nushu}</tt>, <tt>\p{Nshu}</tt>
- <tt>\p{Nyiakeng_Puachue_Hmong}</tt>, <tt>\p{Hmnp}</tt>
- <tt>\p{Ogham}</tt>, <tt>\p{Ogam}</tt>
- <tt>\p{Ol_Chiki}</tt>, <tt>\p{Olck}</tt>
- <tt>\p{Old_Hungarian}</tt>, <tt>\p{Hung}</tt>
- <tt>\p{Old_Italic}</tt>, <tt>\p{Ital}</tt>
- <tt>\p{Old_North_Arabian}</tt>, <tt>\p{Narb}</tt>
- <tt>\p{Old_Permic}</tt>, <tt>\p{Perm}</tt>
- <tt>\p{Old_Persian}</tt>, <tt>\p{Xpeo}</tt>
- <tt>\p{Old_Sogdian}</tt>, <tt>\p{Sogo}</tt>
- <tt>\p{Old_South_Arabian}</tt>, <tt>\p{Sarb}</tt>
- <tt>\p{Old_Turkic}</tt>, <tt>\p{Orkh}</tt>
- <tt>\p{Old_Uyghur}</tt>, <tt>\p{Ougr}</tt>
- <tt>\p{Oriya}</tt>, <tt>\p{Orya}</tt>
- <tt>\p{Osage}</tt>, <tt>\p{Osge}</tt>
- <tt>\p{Osmanya}</tt>, <tt>\p{Osma}</tt>
- <tt>\p{Pahawh_Hmong}</tt>, <tt>\p{Hmng}</tt>
- <tt>\p{Palmyrene}</tt>, <tt>\p{Palm}</tt>
- <tt>\p{Pau_Cin_Hau}</tt>, <tt>\p{Pauc}</tt>
- <tt>\p{Phags_Pa}</tt>, <tt>\p{Phag}</tt>
- <tt>\p{Phoenician}</tt>, <tt>\p{Phnx}</tt>
- <tt>\p{Psalter_Pahlavi}</tt>, <tt>\p{Phlp}</tt>
- <tt>\p{Rejang}</tt>, <tt>\p{Rjng}</tt>
- <tt>\p{Runic}</tt>, <tt>\p{Runr}</tt>
- <tt>\p{Samaritan}</tt>, <tt>\p{Samr}</tt>
- <tt>\p{Saurashtra}</tt>, <tt>\p{Saur}</tt>
- <tt>\p{Sharada}</tt>, <tt>\p{Shrd}</tt>
- <tt>\p{Shavian}</tt>, <tt>\p{Shaw}</tt>
- <tt>\p{Siddham}</tt>, <tt>\p{Sidd}</tt>
- <tt>\p{SignWriting}</tt>, <tt>\p{Sgnw}</tt>
- <tt>\p{Sinhala}</tt>, <tt>\p{Sinh}</tt>
- <tt>\p{Sogdian}</tt>, <tt>\p{Sogd}</tt>
- <tt>\p{Sora_Sompeng}</tt>, <tt>\p{Sora}</tt>
- <tt>\p{Soyombo}</tt>, <tt>\p{Soyo}</tt>
- <tt>\p{Sundanese}</tt>, <tt>\p{Sund}</tt>
- <tt>\p{Syloti_Nagri}</tt>, <tt>\p{Sylo}</tt>
- <tt>\p{Syriac}</tt>, <tt>\p{Syrc}</tt>
- <tt>\p{Tagalog}</tt>, <tt>\p{Tglg}</tt>
- <tt>\p{Tagbanwa}</tt>, <tt>\p{Tagb}</tt>
- <tt>\p{Tai_Le}</tt>, <tt>\p{Tale}</tt>
- <tt>\p{Tai_Tham}</tt>, <tt>\p{Lana}</tt>
- <tt>\p{Tai_Viet}</tt>, <tt>\p{Tavt}</tt>
- <tt>\p{Takri}</tt>, <tt>\p{Takr}</tt>
- <tt>\p{Tamil}</tt>, <tt>\p{Taml}</tt>
- <tt>\p{Tangsa}</tt>, <tt>\p{Tnsa}</tt>
- <tt>\p{Tangut}</tt>, <tt>\p{Tang}</tt>
- <tt>\p{Telugu}</tt>, <tt>\p{Telu}</tt>
- <tt>\p{Thaana}</tt>, <tt>\p{Thaa}</tt>
- <tt>\p{Thai}</tt>
- <tt>\p{Tibetan}</tt>, <tt>\p{Tibt}</tt>
- <tt>\p{Tifinagh}</tt>, <tt>\p{Tfng}</tt>
- <tt>\p{Tirhuta}</tt>, <tt>\p{Tirh}</tt>
- <tt>\p{Toto}</tt>
- <tt>\p{Ugaritic}</tt>, <tt>\p{Ugar}</tt>
- <tt>\p{Unknown}</tt>, <tt>\p{Zzzz}</tt>
- <tt>\p{Vai}</tt>, <tt>\p{Vaii}</tt>
- <tt>\p{Vithkuqi}</tt>, <tt>\p{Vith}</tt>
- <tt>\p{Wancho}</tt>, <tt>\p{Wcho}</tt>
- <tt>\p{Warang_Citi}</tt>, <tt>\p{Wara}</tt>
- <tt>\p{Yezidi}</tt>, <tt>\p{Yezi}</tt>
- <tt>\p{Yi}</tt>, <tt>\p{Yiii}</tt>
- <tt>\p{Zanabazar_Square}</tt>, <tt>\p{Zanb}</tt>

=== Blocks

- <tt>\p{In_Adlam}</tt>
- <tt>\p{In_Aegean_Numbers}</tt>
- <tt>\p{In_Ahom}</tt>
- <tt>\p{In_Alchemical_Symbols}</tt>
- <tt>\p{In_Alphabetic_Presentation_Forms}</tt>
- <tt>\p{In_Anatolian_Hieroglyphs}</tt>
- <tt>\p{In_Ancient_Greek_Musical_Notation}</tt>
- <tt>\p{In_Ancient_Greek_Numbers}</tt>
- <tt>\p{In_Ancient_Symbols}</tt>
- <tt>\p{In_Arabic}</tt>
- <tt>\p{In_Arabic_Extended_A}</tt>
- <tt>\p{In_Arabic_Extended_B}</tt>
- <tt>\p{In_Arabic_Extended_C}</tt>
- <tt>\p{In_Arabic_Mathematical_Alphabetic_Symbols}</tt>
- <tt>\p{In_Arabic_Presentation_Forms_A}</tt>
- <tt>\p{In_Arabic_Presentation_Forms_B}</tt>
- <tt>\p{In_Arabic_Supplement}</tt>
- <tt>\p{In_Armenian}</tt>
- <tt>\p{In_Arrows}</tt>
- <tt>\p{In_Avestan}</tt>
- <tt>\p{In_Balinese}</tt>
- <tt>\p{In_Bamum}</tt>
- <tt>\p{In_Bamum_Supplement}</tt>
- <tt>\p{In_Basic_Latin}</tt>
- <tt>\p{In_Bassa_Vah}</tt>
- <tt>\p{In_Batak}</tt>
- <tt>\p{In_Bengali}</tt>
- <tt>\p{In_Bhaiksuki}</tt>
- <tt>\p{In_Block_Elements}</tt>
- <tt>\p{In_Bopomofo}</tt>
- <tt>\p{In_Bopomofo_Extended}</tt>
- <tt>\p{In_Box_Drawing}</tt>
- <tt>\p{In_Brahmi}</tt>
- <tt>\p{In_Braille_Patterns}</tt>
- <tt>\p{In_Buginese}</tt>
- <tt>\p{In_Buhid}</tt>
- <tt>\p{In_Byzantine_Musical_Symbols}</tt>
- <tt>\p{In_CJK_Compatibility}</tt>
- <tt>\p{In_CJK_Compatibility_Forms}</tt>
- <tt>\p{In_CJK_Compatibility_Ideographs}</tt>
- <tt>\p{In_CJK_Compatibility_Ideographs_Supplement}</tt>
- <tt>\p{In_CJK_Radicals_Supplement}</tt>
- <tt>\p{In_CJK_Strokes}</tt>
- <tt>\p{In_CJK_Symbols_and_Punctuation}</tt>
- <tt>\p{In_CJK_Unified_Ideographs}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_A}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_B}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_C}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_D}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_E}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_F}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_G}</tt>
- <tt>\p{In_CJK_Unified_Ideographs_Extension_H}</tt>
- <tt>\p{In_Carian}</tt>
- <tt>\p{In_Caucasian_Albanian}</tt>
- <tt>\p{In_Chakma}</tt>
- <tt>\p{In_Cham}</tt>
- <tt>\p{In_Cherokee}</tt>
- <tt>\p{In_Cherokee_Supplement}</tt>
- <tt>\p{In_Chess_Symbols}</tt>
- <tt>\p{In_Chorasmian}</tt>
- <tt>\p{In_Combining_Diacritical_Marks}</tt>
- <tt>\p{In_Combining_Diacritical_Marks_Extended}</tt>
- <tt>\p{In_Combining_Diacritical_Marks_Supplement}</tt>
- <tt>\p{In_Combining_Diacritical_Marks_for_Symbols}</tt>
- <tt>\p{In_Combining_Half_Marks}</tt>
- <tt>\p{In_Common_Indic_Number_Forms}</tt>
- <tt>\p{In_Control_Pictures}</tt>
- <tt>\p{In_Coptic}</tt>
- <tt>\p{In_Coptic_Epact_Numbers}</tt>
- <tt>\p{In_Counting_Rod_Numerals}</tt>
- <tt>\p{In_Cuneiform}</tt>
- <tt>\p{In_Cuneiform_Numbers_and_Punctuation}</tt>
- <tt>\p{In_Currency_Symbols}</tt>
- <tt>\p{In_Cypriot_Syllabary}</tt>
- <tt>\p{In_Cypro_Minoan}</tt>
- <tt>\p{In_Cyrillic}</tt>
- <tt>\p{In_Cyrillic_Extended_A}</tt>
- <tt>\p{In_Cyrillic_Extended_B}</tt>
- <tt>\p{In_Cyrillic_Extended_C}</tt>
- <tt>\p{In_Cyrillic_Extended_D}</tt>
- <tt>\p{In_Cyrillic_Supplement}</tt>
- <tt>\p{In_Deseret}</tt>
- <tt>\p{In_Devanagari}</tt>
- <tt>\p{In_Devanagari_Extended}</tt>
- <tt>\p{In_Devanagari_Extended_A}</tt>
- <tt>\p{In_Dingbats}</tt>
- <tt>\p{In_Dives_Akuru}</tt>
- <tt>\p{In_Dogra}</tt>
- <tt>\p{In_Domino_Tiles}</tt>
- <tt>\p{In_Duployan}</tt>
- <tt>\p{In_Early_Dynastic_Cuneiform}</tt>
- <tt>\p{In_Egyptian_Hieroglyph_Format_Controls}</tt>
- <tt>\p{In_Egyptian_Hieroglyphs}</tt>
- <tt>\p{In_Elbasan}</tt>
- <tt>\p{In_Elymaic}</tt>
- <tt>\p{In_Emoticons}</tt>
- <tt>\p{In_Enclosed_Alphanumeric_Supplement}</tt>
- <tt>\p{In_Enclosed_Alphanumerics}</tt>
- <tt>\p{In_Enclosed_CJK_Letters_and_Months}</tt>
- <tt>\p{In_Enclosed_Ideographic_Supplement}</tt>
- <tt>\p{In_Ethiopic}</tt>
- <tt>\p{In_Ethiopic_Extended}</tt>
- <tt>\p{In_Ethiopic_Extended_A}</tt>
- <tt>\p{In_Ethiopic_Extended_B}</tt>
- <tt>\p{In_Ethiopic_Supplement}</tt>
- <tt>\p{In_General_Punctuation}</tt>
- <tt>\p{In_Geometric_Shapes}</tt>
- <tt>\p{In_Geometric_Shapes_Extended}</tt>
- <tt>\p{In_Georgian}</tt>
- <tt>\p{In_Georgian_Extended}</tt>
- <tt>\p{In_Georgian_Supplement}</tt>
- <tt>\p{In_Glagolitic}</tt>
- <tt>\p{In_Glagolitic_Supplement}</tt>
- <tt>\p{In_Gothic}</tt>
- <tt>\p{In_Grantha}</tt>
- <tt>\p{In_Greek_Extended}</tt>
- <tt>\p{In_Greek_and_Coptic}</tt>
- <tt>\p{In_Gujarati}</tt>
- <tt>\p{In_Gunjala_Gondi}</tt>
- <tt>\p{In_Gurmukhi}</tt>
- <tt>\p{In_Halfwidth_and_Fullwidth_Forms}</tt>
- <tt>\p{In_Hangul_Compatibility_Jamo}</tt>
- <tt>\p{In_Hangul_Jamo}</tt>
- <tt>\p{In_Hangul_Jamo_Extended_A}</tt>
- <tt>\p{In_Hangul_Jamo_Extended_B}</tt>
- <tt>\p{In_Hangul_Syllables}</tt>
- <tt>\p{In_Hanifi_Rohingya}</tt>
- <tt>\p{In_Hanunoo}</tt>
- <tt>\p{In_Hatran}</tt>
- <tt>\p{In_Hebrew}</tt>
- <tt>\p{In_High_Private_Use_Surrogates}</tt>
- <tt>\p{In_High_Surrogates}</tt>
- <tt>\p{In_Hiragana}</tt>
- <tt>\p{In_IPA_Extensions}</tt>
- <tt>\p{In_Ideographic_Description_Characters}</tt>
- <tt>\p{In_Ideographic_Symbols_and_Punctuation}</tt>
- <tt>\p{In_Imperial_Aramaic}</tt>
- <tt>\p{In_Indic_Siyaq_Numbers}</tt>
- <tt>\p{In_Inscriptional_Pahlavi}</tt>
- <tt>\p{In_Inscriptional_Parthian}</tt>
- <tt>\p{In_Javanese}</tt>
- <tt>\p{In_Kaithi}</tt>
- <tt>\p{In_Kaktovik_Numerals}</tt>
- <tt>\p{In_Kana_Extended_A}</tt>
- <tt>\p{In_Kana_Extended_B}</tt>
- <tt>\p{In_Kana_Supplement}</tt>
- <tt>\p{In_Kanbun}</tt>
- <tt>\p{In_Kangxi_Radicals}</tt>
- <tt>\p{In_Kannada}</tt>
- <tt>\p{In_Katakana}</tt>
- <tt>\p{In_Katakana_Phonetic_Extensions}</tt>
- <tt>\p{In_Kawi}</tt>
- <tt>\p{In_Kayah_Li}</tt>
- <tt>\p{In_Kharoshthi}</tt>
- <tt>\p{In_Khitan_Small_Script}</tt>
- <tt>\p{In_Khmer}</tt>
- <tt>\p{In_Khmer_Symbols}</tt>
- <tt>\p{In_Khojki}</tt>
- <tt>\p{In_Khudawadi}</tt>
- <tt>\p{In_Lao}</tt>
- <tt>\p{In_Latin_1_Supplement}</tt>
- <tt>\p{In_Latin_Extended_A}</tt>
- <tt>\p{In_Latin_Extended_Additional}</tt>
- <tt>\p{In_Latin_Extended_B}</tt>
- <tt>\p{In_Latin_Extended_C}</tt>
- <tt>\p{In_Latin_Extended_D}</tt>
- <tt>\p{In_Latin_Extended_E}</tt>
- <tt>\p{In_Latin_Extended_F}</tt>
- <tt>\p{In_Latin_Extended_G}</tt>
- <tt>\p{In_Lepcha}</tt>
- <tt>\p{In_Letterlike_Symbols}</tt>
- <tt>\p{In_Limbu}</tt>
- <tt>\p{In_Linear_A}</tt>
- <tt>\p{In_Linear_B_Ideograms}</tt>
- <tt>\p{In_Linear_B_Syllabary}</tt>
- <tt>\p{In_Lisu}</tt>
- <tt>\p{In_Lisu_Supplement}</tt>
- <tt>\p{In_Low_Surrogates}</tt>
- <tt>\p{In_Lycian}</tt>
- <tt>\p{In_Lydian}</tt>
- <tt>\p{In_Mahajani}</tt>
- <tt>\p{In_Mahjong_Tiles}</tt>
- <tt>\p{In_Makasar}</tt>
- <tt>\p{In_Malayalam}</tt>
- <tt>\p{In_Mandaic}</tt>
- <tt>\p{In_Manichaean}</tt>
- <tt>\p{In_Marchen}</tt>
- <tt>\p{In_Masaram_Gondi}</tt>
- <tt>\p{In_Mathematical_Alphanumeric_Symbols}</tt>
- <tt>\p{In_Mathematical_Operators}</tt>
- <tt>\p{In_Mayan_Numerals}</tt>
- <tt>\p{In_Medefaidrin}</tt>
- <tt>\p{In_Meetei_Mayek}</tt>
- <tt>\p{In_Meetei_Mayek_Extensions}</tt>
- <tt>\p{In_Mende_Kikakui}</tt>
- <tt>\p{In_Meroitic_Cursive}</tt>
- <tt>\p{In_Meroitic_Hieroglyphs}</tt>
- <tt>\p{In_Miao}</tt>
- <tt>\p{In_Miscellaneous_Mathematical_Symbols_A}</tt>
- <tt>\p{In_Miscellaneous_Mathematical_Symbols_B}</tt>
- <tt>\p{In_Miscellaneous_Symbols}</tt>
- <tt>\p{In_Miscellaneous_Symbols_and_Arrows}</tt>
- <tt>\p{In_Miscellaneous_Symbols_and_Pictographs}</tt>
- <tt>\p{In_Miscellaneous_Technical}</tt>
- <tt>\p{In_Modi}</tt>
- <tt>\p{In_Modifier_Tone_Letters}</tt>
- <tt>\p{In_Mongolian}</tt>
- <tt>\p{In_Mongolian_Supplement}</tt>
- <tt>\p{In_Mro}</tt>
- <tt>\p{In_Multani}</tt>
- <tt>\p{In_Musical_Symbols}</tt>
- <tt>\p{In_Myanmar}</tt>
- <tt>\p{In_Myanmar_Extended_A}</tt>
- <tt>\p{In_Myanmar_Extended_B}</tt>
- <tt>\p{In_NKo}</tt>
- <tt>\p{In_Nabataean}</tt>
- <tt>\p{In_Nag_Mundari}</tt>
- <tt>\p{In_Nandinagari}</tt>
- <tt>\p{In_New_Tai_Lue}</tt>
- <tt>\p{In_Newa}</tt>
- <tt>\p{In_No_Block}</tt>
- <tt>\p{In_Number_Forms}</tt>
- <tt>\p{In_Nushu}</tt>
- <tt>\p{In_Nyiakeng_Puachue_Hmong}</tt>
- <tt>\p{In_Ogham}</tt>
- <tt>\p{In_Ol_Chiki}</tt>
- <tt>\p{In_Old_Hungarian}</tt>
- <tt>\p{In_Old_Italic}</tt>
- <tt>\p{In_Old_North_Arabian}</tt>
- <tt>\p{In_Old_Permic}</tt>
- <tt>\p{In_Old_Persian}</tt>
- <tt>\p{In_Old_Sogdian}</tt>
- <tt>\p{In_Old_South_Arabian}</tt>
- <tt>\p{In_Old_Turkic}</tt>
- <tt>\p{In_Old_Uyghur}</tt>
- <tt>\p{In_Optical_Character_Recognition}</tt>
- <tt>\p{In_Oriya}</tt>
- <tt>\p{In_Ornamental_Dingbats}</tt>
- <tt>\p{In_Osage}</tt>
- <tt>\p{In_Osmanya}</tt>
- <tt>\p{In_Ottoman_Siyaq_Numbers}</tt>
- <tt>\p{In_Pahawh_Hmong}</tt>
- <tt>\p{In_Palmyrene}</tt>
- <tt>\p{In_Pau_Cin_Hau}</tt>
- <tt>\p{In_Phags_pa}</tt>
- <tt>\p{In_Phaistos_Disc}</tt>
- <tt>\p{In_Phoenician}</tt>
- <tt>\p{In_Phonetic_Extensions}</tt>
- <tt>\p{In_Phonetic_Extensions_Supplement}</tt>
- <tt>\p{In_Playing_Cards}</tt>
- <tt>\p{In_Private_Use_Area}</tt>
- <tt>\p{In_Psalter_Pahlavi}</tt>
- <tt>\p{In_Rejang}</tt>
- <tt>\p{In_Rumi_Numeral_Symbols}</tt>
- <tt>\p{In_Runic}</tt>
- <tt>\p{In_Samaritan}</tt>
- <tt>\p{In_Saurashtra}</tt>
- <tt>\p{In_Sharada}</tt>
- <tt>\p{In_Shavian}</tt>
- <tt>\p{In_Shorthand_Format_Controls}</tt>
- <tt>\p{In_Siddham}</tt>
- <tt>\p{In_Sinhala}</tt>
- <tt>\p{In_Sinhala_Archaic_Numbers}</tt>
- <tt>\p{In_Small_Form_Variants}</tt>
- <tt>\p{In_Small_Kana_Extension}</tt>
- <tt>\p{In_Sogdian}</tt>
- <tt>\p{In_Sora_Sompeng}</tt>
- <tt>\p{In_Soyombo}</tt>
- <tt>\p{In_Spacing_Modifier_Letters}</tt>
- <tt>\p{In_Specials}</tt>
- <tt>\p{In_Sundanese}</tt>
- <tt>\p{In_Sundanese_Supplement}</tt>
- <tt>\p{In_Superscripts_and_Subscripts}</tt>
- <tt>\p{In_Supplemental_Arrows_A}</tt>
- <tt>\p{In_Supplemental_Arrows_B}</tt>
- <tt>\p{In_Supplemental_Arrows_C}</tt>
- <tt>\p{In_Supplemental_Mathematical_Operators}</tt>
- <tt>\p{In_Supplemental_Punctuation}</tt>
- <tt>\p{In_Supplemental_Symbols_and_Pictographs}</tt>
- <tt>\p{In_Supplementary_Private_Use_Area_A}</tt>
- <tt>\p{In_Supplementary_Private_Use_Area_B}</tt>
- <tt>\p{In_Sutton_SignWriting}</tt>
- <tt>\p{In_Syloti_Nagri}</tt>
- <tt>\p{In_Symbols_and_Pictographs_Extended_A}</tt>
- <tt>\p{In_Symbols_for_Legacy_Computing}</tt>
- <tt>\p{In_Syriac}</tt>
- <tt>\p{In_Syriac_Supplement}</tt>
- <tt>\p{In_Tagalog}</tt>
- <tt>\p{In_Tagbanwa}</tt>
- <tt>\p{In_Tags}</tt>
- <tt>\p{In_Tai_Le}</tt>
- <tt>\p{In_Tai_Tham}</tt>
- <tt>\p{In_Tai_Viet}</tt>
- <tt>\p{In_Tai_Xuan_Jing_Symbols}</tt>
- <tt>\p{In_Takri}</tt>
- <tt>\p{In_Tamil}</tt>
- <tt>\p{In_Tamil_Supplement}</tt>
- <tt>\p{In_Tangsa}</tt>
- <tt>\p{In_Tangut}</tt>
- <tt>\p{In_Tangut_Components}</tt>
- <tt>\p{In_Tangut_Supplement}</tt>
- <tt>\p{In_Telugu}</tt>
- <tt>\p{In_Thaana}</tt>
- <tt>\p{In_Thai}</tt>
- <tt>\p{In_Tibetan}</tt>
- <tt>\p{In_Tifinagh}</tt>
- <tt>\p{In_Tirhuta}</tt>
- <tt>\p{In_Toto}</tt>
- <tt>\p{In_Transport_and_Map_Symbols}</tt>
- <tt>\p{In_Ugaritic}</tt>
- <tt>\p{In_Unified_Canadian_Aboriginal_Syllabics}</tt>
- <tt>\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}</tt>
- <tt>\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended_A}</tt>
- <tt>\p{In_Vai}</tt>
- <tt>\p{In_Variation_Selectors}</tt>
- <tt>\p{In_Variation_Selectors_Supplement}</tt>
- <tt>\p{In_Vedic_Extensions}</tt>
- <tt>\p{In_Vertical_Forms}</tt>
- <tt>\p{In_Vithkuqi}</tt>
- <tt>\p{In_Wancho}</tt>
- <tt>\p{In_Warang_Citi}</tt>
- <tt>\p{In_Yezidi}</tt>
- <tt>\p{In_Yi_Radicals}</tt>
- <tt>\p{In_Yi_Syllables}</tt>
- <tt>\p{In_Yijing_Hexagram_Symbols}</tt>
- <tt>\p{In_Zanabazar_Square}</tt>
- <tt>\p{In_Znamenny_Musical_Notation}</tt>

=== Emoji

- <tt>\p{Emoji}</tt>
- <tt>\p{Emoji_Component}</tt>, <tt>\p{EComp}</tt>
- <tt>\p{Emoji_Modifier}</tt>, <tt>\p{EMod}</tt>
- <tt>\p{Emoji_Modifier_Base}</tt>, <tt>\p{EBase}</tt>
- <tt>\p{Emoji_Presentation}</tt>, <tt>\p{EPres}</tt>
- <tt>\p{Extended_Pictographic}</tt>, <tt>\p{ExtPict}</tt>

=== Graphemes

- <tt>\p{Grapheme_Cluster_Break_CR}</tt>
- <tt>\p{Grapheme_Cluster_Break_Control}</tt>
- <tt>\p{Grapheme_Cluster_Break_Extend}</tt>
- <tt>\p{Grapheme_Cluster_Break_L}</tt>
- <tt>\p{Grapheme_Cluster_Break_LF}</tt>
- <tt>\p{Grapheme_Cluster_Break_LV}</tt>
- <tt>\p{Grapheme_Cluster_Break_LVT}</tt>
- <tt>\p{Grapheme_Cluster_Break_Prepend}</tt>
- <tt>\p{Grapheme_Cluster_Break_Regional_Indicator}</tt>
- <tt>\p{Grapheme_Cluster_Break_SpacingMark}</tt>
- <tt>\p{Grapheme_Cluster_Break_T}</tt>
- <tt>\p{Grapheme_Cluster_Break_V}</tt>
- <tt>\p{Grapheme_Cluster_Break_ZWJ}</tt>

=== Derived Ages

- <tt>\p{Age_10_0}</tt>
- <tt>\p{Age_11_0}</tt>
- <tt>\p{Age_12_0}</tt>
- <tt>\p{Age_12_1}</tt>
- <tt>\p{Age_13_0}</tt>
- <tt>\p{Age_14_0}</tt>
- <tt>\p{Age_15_0}</tt>
- <tt>\p{Age_1_1}</tt>
- <tt>\p{Age_2_0}</tt>
- <tt>\p{Age_2_1}</tt>
- <tt>\p{Age_3_0}</tt>
- <tt>\p{Age_3_1}</tt>
- <tt>\p{Age_3_2}</tt>
- <tt>\p{Age_4_0}</tt>
- <tt>\p{Age_4_1}</tt>
- <tt>\p{Age_5_0}</tt>
- <tt>\p{Age_5_1}</tt>
- <tt>\p{Age_5_2}</tt>
- <tt>\p{Age_6_0}</tt>
- <tt>\p{Age_6_1}</tt>
- <tt>\p{Age_6_2}</tt>
- <tt>\p{Age_6_3}</tt>
- <tt>\p{Age_7_0}</tt>
- <tt>\p{Age_8_0}</tt>
- <tt>\p{Age_9_0}</tt>