92
|
1 /*******************************************************************************
|
|
2
|
|
3 @file UChar.d
|
|
4
|
|
5 Copyright (c) 2004 Kris Bell
|
|
6
|
|
7 This software is provided 'as-is', without any express or implied
|
|
8 warranty. In no event will the authors be held liable for damages
|
|
9 of any kind arising from the use of this software.
|
|
10
|
|
11 Permission is hereby granted to anyone to use this software for any
|
|
12 purpose, including commercial applications, and to alter it and/or
|
|
13 redistribute it freely, subject to the following restrictions:
|
|
14
|
|
15 1. The origin of this software must not be misrepresented; you must
|
|
16 not claim that you wrote the original software. If you use this
|
|
17 software in a product, an acknowledgment within documentation of
|
|
18 said product would be appreciated but is not required.
|
|
19
|
|
20 2. Altered source versions must be plainly marked as such, and must
|
|
21 not be misrepresented as being the original software.
|
|
22
|
|
23 3. This notice may not be removed or altered from any distribution
|
|
24 of the source.
|
|
25
|
|
26 4. Derivative works are permitted, but they must carry this notice
|
|
27 in full and credit the original source.
|
|
28
|
|
29
|
|
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
31
|
|
32
|
|
33 @version Initial version, October 2004
|
|
34 @author Kris
|
|
35
|
|
36
|
|
37 Note that this package and documentation is built around the ICU
|
|
38 project (http://oss.software.ibm.com/icu/). Below is the license
|
|
39 statement as specified by that software:
|
|
40
|
|
41
|
|
42 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
43
|
|
44
|
|
45 ICU License - ICU 1.8.1 and later
|
|
46
|
|
47 COPYRIGHT AND PERMISSION NOTICE
|
|
48
|
|
49 Copyright (c) 1995-2003 International Business Machines Corporation and
|
|
50 others.
|
|
51
|
|
52 All rights reserved.
|
|
53
|
|
54 Permission is hereby granted, free of charge, to any person obtaining a
|
|
55 copy of this software and associated documentation files (the
|
|
56 "Software"), to deal in the Software without restriction, including
|
|
57 without limitation the rights to use, copy, modify, merge, publish,
|
|
58 distribute, and/or sell copies of the Software, and to permit persons
|
|
59 to whom the Software is furnished to do so, provided that the above
|
|
60 copyright notice(s) and this permission notice appear in all copies of
|
|
61 the Software and that both the above copyright notice(s) and this
|
|
62 permission notice appear in supporting documentation.
|
|
63
|
|
64 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
65 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
66 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
|
67 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
68 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
|
|
69 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
|
|
70 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
71 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
|
72 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
73
|
|
74 Except as contained in this notice, the name of a copyright holder
|
|
75 shall not be used in advertising or otherwise to promote the sale, use
|
|
76 or other dealings in this Software without prior written authorization
|
|
77 of the copyright holder.
|
|
78
|
|
79 ----------------------------------------------------------------------
|
|
80
|
|
81 All trademarks and registered trademarks mentioned herein are the
|
|
82 property of their respective owners.
|
|
83
|
|
84 *******************************************************************************/
|
|
85
|
|
86 module dwtx.dwtxhelper.mangoicu.UChar;
|
|
87
|
|
88 private import dwtx.dwtxhelper.mangoicu.ICU;
|
|
89
|
|
90 /*******************************************************************************
|
|
91
|
|
92 This API provides low-level access to the Unicode Character
|
|
93 Database. In addition to raw property values, some convenience
|
|
94 functions calculate derived properties, for example for Java-style
|
|
95 programming.
|
|
96
|
|
97 Unicode assigns each code point (not just assigned character)
|
|
98 values for many properties. Most of them are simple boolean
|
|
99 flags, or constants from a small enumerated list. For some
|
|
100 properties, values are strings or other relatively more complex
|
|
101 types.
|
|
102
|
|
103 For more information see "About the Unicode Character Database"
|
|
104 (http://www.unicode.org/ucd/) and the ICU User Guide chapter on
|
|
105 Properties (http://oss.software.ibm.com/icu/userguide/properties.html).
|
|
106
|
|
107 Many functions are designed to match java.lang.Character functions.
|
|
108 See the individual function documentation, and see the JDK 1.4.1
|
|
109 java.lang.Character documentation at
|
|
110 http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
|
|
111
|
|
112 There are also functions that provide easy migration from C/POSIX
|
|
113 functions like isblank(). Their use is generally discouraged because
|
|
114 the C/POSIX standards do not define their semantics beyond the ASCII
|
|
115 range, which means that different implementations exhibit very different
|
|
116 behavior. Instead, Unicode properties should be used directly.
|
|
117
|
|
118 There are also only a few, broad C/POSIX character classes, and they
|
|
119 tend to be used for conflicting purposes. For example, the "isalpha()"
|
|
120 class is sometimes used to determine word boundaries, while a more
|
|
121 sophisticated approach would at least distinguish initial letters from
|
|
122 continuation characters (the latter including combining marks). (In
|
|
123 ICU, BreakIterator is the most sophisticated API for word boundaries.)
|
|
124 Another example: There is no "istitle()" class for titlecase characters.
|
|
125
|
|
126 A summary of the behavior of some C/POSIX character classification
|
|
127 implementations for Unicode is available at
|
|
128 http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
|
|
129
|
|
130 See <A HREF="http://oss.software.ibm.com/icu/apiref/uchar_8h.html">
|
|
131 this page</A> for full details.
|
|
132
|
|
133 *******************************************************************************/
|
|
134
|
|
135 class UChar : ICU
|
|
136 {
|
|
137 public enum Property
|
|
138 {
|
|
139 Alphabetic = 0,
|
|
140 BinaryStart = Alphabetic,
|
|
141 AsciiHexDigit,
|
|
142 BidiControl,
|
|
143 BidiMirrored,
|
|
144 Dash,
|
|
145 DefaultIgnorableCodePoint,
|
|
146 Deprecated,
|
|
147 Diacritic,
|
|
148 Extender,
|
|
149 FullCompositionExclusion,
|
|
150 GraphemeBase,
|
|
151 GraphemeExtend,
|
|
152 GraphemeLink,
|
|
153 HexDigit,
|
|
154 Hyphen,
|
|
155 IdContinue,
|
|
156 IdStart,
|
|
157 Ideographic,
|
|
158 IdsBinaryOperator,
|
|
159 IdsTrinaryOperator,
|
|
160 JoinControl,
|
|
161 LogicalOrderException,
|
|
162 Lowercase,
|
|
163 Math,
|
|
164 NoncharacterCodePoint,
|
|
165 QuotationMark,
|
|
166 Radical,
|
|
167 SoftDotted,
|
|
168 TerminalPunctuation,
|
|
169 UnifiedIdeograph,
|
|
170 Uppercase,
|
|
171 WhiteSpace,
|
|
172 XidContinue,
|
|
173 XidStart,
|
|
174 CaseSensitive,
|
|
175 STerm,
|
|
176 VariationSelector,
|
|
177 NfdInert,
|
|
178 NfkdInert,
|
|
179 NfcInert,
|
|
180 NfkcInert,
|
|
181 SegmentStarter,
|
|
182 BinaryLimit,
|
|
183 BidiClass = 0x1000,
|
|
184 IntStart = BidiClass,
|
|
185 Block, CanonicalCombiningClass,
|
|
186 DecompositionType,
|
|
187 EastAsianWidth,
|
|
188 GeneralCategory,
|
|
189 JoiningGroup,
|
|
190 JoiningType,
|
|
191 LineBreak,
|
|
192 NumericType,
|
|
193 Script,
|
|
194 HangulSyllableType,
|
|
195 NfdQuickCheck,
|
|
196 NfkdQuickCheck,
|
|
197 NfcQuickCheck,
|
|
198 NfkcQuickCheck,
|
|
199 LeadCanonicalCombiningClass,
|
|
200 TrailCanonicalCombiningClass,
|
|
201 IntLimit,
|
|
202 GeneralCategoryMask = 0x2000,
|
|
203 MaskStart = GeneralCategoryMask,
|
|
204 MaskLimit,
|
|
205 NumericValue = 0x3000,
|
|
206 DoubleStart = NumericValue,
|
|
207 DoubleLimit,
|
|
208 Age = 0x4000,
|
|
209 StringStart = Age,
|
|
210 BidiMirroringGlyph,
|
|
211 CaseFolding,
|
|
212 IsoComment,
|
|
213 LowercaseMapping,
|
|
214 Name,
|
|
215 SimpleCaseFolding,
|
|
216 SimpleLowercaseMapping,
|
|
217 SimpleTitlecaseMapping,
|
|
218 SimpleUppercaseMapping,
|
|
219 TitlecaseMapping,
|
|
220 Unicode1Name,
|
|
221 UppercaseMapping,
|
|
222 StringLimit,
|
|
223 InvalidCode = -1
|
|
224 }
|
|
225
|
|
226 public enum Category
|
|
227 {
|
|
228 Unassigned = 0,
|
|
229 GeneralOtherTypes = 0,
|
|
230 UppercaseLetter = 1,
|
|
231 LowercaseLetter = 2,
|
|
232 TitlecaseLetter = 3,
|
|
233 ModifierLetter = 4,
|
|
234 OtherLetter = 5,
|
|
235 NonSpacingMark = 6,
|
|
236 EnclosingMark = 7,
|
|
237 CombiningSpacingMark = 8,
|
|
238 DecimalDigitNumber = 9,
|
|
239 LetterNumber = 10,
|
|
240 OtherNumber = 11,
|
|
241 SpaceSeparator = 12,
|
|
242 LineSeparator = 13,
|
|
243 ParagraphSeparator = 14,
|
|
244 ControlChar = 15,
|
|
245 FormatChar = 16,
|
|
246 PrivateUseChar = 17,
|
|
247 Surrogate = 18,
|
|
248 DashPunctuation = 19,
|
|
249 StartPunctuation = 20,
|
|
250 EndPunctuation = 21,
|
|
251 ConnectorPunctuation = 22,
|
|
252 OtherPunctuation = 23,
|
|
253 MathSymbol = 24,
|
|
254 CurrencySymbol = 25,
|
|
255 ModifierSymbol = 26,
|
|
256 OtherSymbol = 27,
|
|
257 InitialPunctuation = 28,
|
|
258 FinalPunctuation = 29,
|
|
259 Count
|
|
260 }
|
|
261
|
|
262 public enum Direction
|
|
263 {
|
|
264 LeftToRight = 0,
|
|
265 RightToLeft = 1,
|
|
266 EuropeanNumber = 2,
|
|
267 EuropeanNumberSeparator = 3,
|
|
268 EuropeanNumberTerminator = 4,
|
|
269 ArabicNumber = 5,
|
|
270 CommonNumberSeparator = 6,
|
|
271 BlockSeparator = 7,
|
|
272 SegmentSeparator = 8,
|
|
273 WhiteSpaceNeutral = 9,
|
|
274 OtherNeutral = 10,
|
|
275 LeftToRightEmbedding = 11,
|
|
276 LeftToRightOverride = 12,
|
|
277 RightToLeftArabic = 13,
|
|
278 RightToLeftEmbedding = 14,
|
|
279 RightToLeftOverride = 15,
|
|
280 PopDirectionalFormat = 16,
|
|
281 DirNonSpacingMark = 17,
|
|
282 BoundaryNeutral = 18,
|
|
283 Count
|
|
284 }
|
|
285
|
|
286 public enum BlockCode
|
|
287 {
|
|
288 NoBlock = 0,
|
|
289 BasicLatin = 1,
|
|
290 Latin1Supplement = 2,
|
|
291 LatinExtendedA = 3,
|
|
292 LatinExtendedB = 4,
|
|
293 IpaExtensions = 5,
|
|
294 SpacingModifierLetters = 6,
|
|
295 CombiningDiacriticalMarks = 7,
|
|
296 Greek = 8,
|
|
297 Cyrillic = 9,
|
|
298 Armenian = 10,
|
|
299 Hebrew = 11,
|
|
300 Arabic = 12,
|
|
301 Syriac = 13,
|
|
302 Thaana = 14,
|
|
303 Devanagari = 15,
|
|
304 Bengali = 16,
|
|
305 Gurmukhi = 17,
|
|
306 Gujarati = 18,
|
|
307 Oriya = 19,
|
|
308 Tamil = 20,
|
|
309 Telugu = 21,
|
|
310 Kannada = 22,
|
|
311 Malayalam = 23,
|
|
312 Sinhala = 24,
|
|
313 Thai = 25,
|
|
314 Lao = 26,
|
|
315 Tibetan = 27,
|
|
316 Myanmar = 28,
|
|
317 Georgian = 29,
|
|
318 HangulJamo = 30,
|
|
319 Ethiopic = 31,
|
|
320 Cherokee = 32,
|
|
321 UnifiedCanadianAboriginalSyllabics = 33,
|
|
322 Ogham = 34,
|
|
323 Runic = 35,
|
|
324 Khmer = 36,
|
|
325 Mongolian = 37,
|
|
326 LatinExtendedAdditional = 38,
|
|
327 GreekExtended = 39,
|
|
328 GeneralPunctuation = 40,
|
|
329 SuperscriptsAndSubscripts = 41,
|
|
330 CurrencySymbols = 42,
|
|
331 CombiningMarksForSymbols = 43,
|
|
332 LetterlikeSymbols = 44,
|
|
333 NumberForms = 45,
|
|
334 Arrows = 46,
|
|
335 MathematicalOperators = 47,
|
|
336 MiscellaneousTechnical = 48,
|
|
337 ControlPictures = 49,
|
|
338 OpticalCharacterRecognition = 50,
|
|
339 EnclosedAlphanumerics = 51,
|
|
340 BoxDrawing = 52,
|
|
341 BlockElements = 53,
|
|
342 GeometricShapes = 54,
|
|
343 MiscellaneousSymbols = 55,
|
|
344 Dingbats = 56,
|
|
345 BraillePatterns = 57,
|
|
346 CjkRadicalsSupplement = 58,
|
|
347 KangxiRadicals = 59,
|
|
348 IdeographicDescriptionCharacters = 60,
|
|
349 CjkSymbolsAndPunctuation = 61,
|
|
350 Hiragana = 62,
|
|
351 Katakana = 63,
|
|
352 Bopomofo = 64,
|
|
353 HangulCompatibilityJamo = 65,
|
|
354 Kanbun = 66,
|
|
355 BopomofoExtended = 67,
|
|
356 EnclosedCjkLettersAndMonths = 68,
|
|
357 CjkCompatibility = 69,
|
|
358 CjkUnifiedIdeographsExtensionA = 70,
|
|
359 CjkUnifiedIdeographs = 71,
|
|
360 YiSyllables = 72,
|
|
361 YiRadicals = 73,
|
|
362 HangulSyllables = 74,
|
|
363 HighSurrogates = 75,
|
|
364 HighPrivateUseSurrogates = 76,
|
|
365 LowSurrogates = 77,
|
|
366 PrivateUse = 78,
|
|
367 PrivateUseArea = PrivateUse,
|
|
368 CjkCompatibilityIdeographs = 79,
|
|
369 AlphabeticPresentationForms = 80,
|
|
370 ArabicPresentationFormsA = 81,
|
|
371 CombiningHalfMarks = 82,
|
|
372 CjkCompatibilityForms = 83,
|
|
373 SmallFormVariants = 84,
|
|
374 ArabicPresentationFormsB = 85,
|
|
375 Specials = 86,
|
|
376 HalfwidthAndFullwidthForms = 87,
|
|
377 OldItalic = 88,
|
|
378 Gothic = 89,
|
|
379 Deseret = 90,
|
|
380 ByzantineMusicalSymbols = 91,
|
|
381 MusicalSymbols = 92,
|
|
382 MathematicalAlphanumericSymbols = 93,
|
|
383 CjkUnifiedIdeographsExtensionB = 94,
|
|
384 CjkCompatibilityIdeographsSupplement = 95,
|
|
385 Tags = 96,
|
|
386 CyrillicSupplementary = 97,
|
|
387 CyrillicSupplement = CyrillicSupplementary,
|
|
388 Tagalog = 98,
|
|
389 Hanunoo = 99,
|
|
390 Buhid = 100,
|
|
391 Tagbanwa = 101,
|
|
392 MiscellaneousMathematicalSymbolsA = 102,
|
|
393 SupplementalArrowsA = 103,
|
|
394 SupplementalArrowsB = 104,
|
|
395 MiscellaneousMathematicalSymbolsB = 105,
|
|
396 SupplementalMathematicalOperators = 106,
|
|
397 KatakanaPhoneticExtensions = 107,
|
|
398 VariationSelectors = 108,
|
|
399 SupplementaryPrivateUseAreaA = 109,
|
|
400 SupplementaryPrivateUseAreaB = 110,
|
|
401 Limbu = 111,
|
|
402 TaiLe = 112,
|
|
403 KhmerSymbols = 113,
|
|
404 PhoneticExtensions = 114,
|
|
405 MiscellaneousSymbolsAndArrows = 115,
|
|
406 YijingHexagramSymbols = 116,
|
|
407 LinearBSyllabary = 117,
|
|
408 LinearBIdeograms = 118,
|
|
409 AegeanNumbers = 119,
|
|
410 Ugaritic = 120,
|
|
411 Shavian = 121,
|
|
412 Osmanya = 122,
|
|
413 CypriotSyllabary = 123,
|
|
414 TaiXuanJingSymbols = 124,
|
|
415 VariationSelectorsSupplement = 125,
|
|
416 Count,
|
|
417 InvalidCode = -1
|
|
418 }
|
|
419
|
|
420 public enum EastAsianWidth
|
|
421 {
|
|
422 Neutral,
|
|
423 Ambiguous,
|
|
424 Halfwidth,
|
|
425 Fullwidth,
|
|
426 Narrow,
|
|
427 Wide,
|
|
428 Count
|
|
429 }
|
|
430
|
|
431 public enum CharNameChoice
|
|
432 {
|
|
433 Unicode,
|
|
434 Unicode10,
|
|
435 Extended,
|
|
436 Count
|
|
437 }
|
|
438
|
|
439 public enum NameChoice
|
|
440 {
|
|
441 Short,
|
|
442 Long,
|
|
443 Count
|
|
444 }
|
|
445
|
|
446 public enum DecompositionType
|
|
447 {
|
|
448 None,
|
|
449 Canonical,
|
|
450 Compat,
|
|
451 Circle,
|
|
452 Final,
|
|
453 Font,
|
|
454 Fraction,
|
|
455 Initial,
|
|
456 Isolated,
|
|
457 Medial,
|
|
458 Narrow,
|
|
459 Nobreak,
|
|
460 Small,
|
|
461 Square,
|
|
462 Sub,
|
|
463 Super,
|
|
464 Vertical,
|
|
465 Wide,
|
|
466 Count
|
|
467 }
|
|
468
|
|
469 public enum JoiningType
|
|
470 {
|
|
471 NonJoining,
|
|
472 JoinCausing,
|
|
473 DualJoining,
|
|
474 LeftJoining,
|
|
475 RightJoining,
|
|
476 Transparent,
|
|
477 Count
|
|
478 }
|
|
479
|
|
480 public enum JoiningGroup
|
|
481 {
|
|
482 NoJoiningGroup,
|
|
483 Ain,
|
|
484 Alaph,
|
|
485 Alef,
|
|
486 Beh,
|
|
487 Beth,
|
|
488 Dal,
|
|
489 DalathRish,
|
|
490 E,
|
|
491 Feh,
|
|
492 FinalSemkath,
|
|
493 Gaf,
|
|
494 Gamal,
|
|
495 Hah,
|
|
496 HamzaOnHehGoal,
|
|
497 He,
|
|
498 Heh,
|
|
499 HehGoal,
|
|
500 Heth,
|
|
501 Kaf,
|
|
502 Kaph,
|
|
503 KnottedHeh,
|
|
504 Lam,
|
|
505 Lamadh,
|
|
506 Meem,
|
|
507 Mim,
|
|
508 Noon,
|
|
509 Nun,
|
|
510 Pe,
|
|
511 Qaf,
|
|
512 Qaph,
|
|
513 Reh,
|
|
514 Reversed_Pe,
|
|
515 Sad,
|
|
516 Sadhe,
|
|
517 Seen,
|
|
518 Semkath,
|
|
519 Shin,
|
|
520 Swash_Kaf,
|
|
521 Syriac_Waw,
|
|
522 Tah,
|
|
523 Taw,
|
|
524 Teh_Marbuta,
|
|
525 Teth,
|
|
526 Waw,
|
|
527 Yeh,
|
|
528 Yeh_Barree,
|
|
529 Yeh_With_Tail,
|
|
530 Yudh,
|
|
531 Yudh_He,
|
|
532 Zain,
|
|
533 Fe,
|
|
534 Khaph,
|
|
535 Zhain,
|
|
536 Count
|
|
537 }
|
|
538
|
|
539 public enum LineBreak
|
|
540 {
|
|
541 Unknown,
|
|
542 Ambiguous,
|
|
543 Alphabetic,
|
|
544 BreakBoth,
|
|
545 BreakAfter,
|
|
546 BreakBefore,
|
|
547 MandatoryBreak,
|
|
548 ContingentBreak,
|
|
549 ClosePunctuation,
|
|
550 CombiningMark,
|
|
551 CarriageReturn,
|
|
552 Exclamation,
|
|
553 Glue,
|
|
554 Hyphen,
|
|
555 Ideographic,
|
|
556 Inseperable,
|
|
557 Inseparable = Inseperable,
|
|
558 InfixNumeric,
|
|
559 LineFeed,
|
|
560 Nonstarter,
|
|
561 Numeric,
|
|
562 OpenPunctuation,
|
|
563 PostfixNumeric,
|
|
564 PrefixNumeric,
|
|
565 Quotation,
|
|
566 ComplexContext,
|
|
567 Surrogate,
|
|
568 Space,
|
|
569 BreakSymbols,
|
|
570 Zwspace,
|
|
571 NextLine,
|
|
572 WordJoiner,
|
|
573 Count
|
|
574 }
|
|
575
|
|
576 public enum NumericType
|
|
577 {
|
|
578 None,
|
|
579 Decimal,
|
|
580 Digit,
|
|
581 Numeric,
|
|
582 Count
|
|
583 }
|
|
584
|
|
585 public enum HangulSyllableType
|
|
586 {
|
|
587 NotApplicable,
|
|
588 LeadingJamo,
|
|
589 VowelJamo,
|
|
590 TrailingJamo,
|
|
591 LvSyllable,
|
|
592 LvtSyllable,
|
|
593 Count
|
|
594 }
|
|
595
|
|
596 /***********************************************************************
|
|
597
|
|
598 Get the property value for an enumerated or integer
|
|
599 Unicode property for a code point. Also returns binary
|
|
600 and mask property values.
|
|
601
|
|
602 Unicode, especially in version 3.2, defines many more
|
|
603 properties than the original set in UnicodeData.txt.
|
|
604
|
|
605 The properties APIs are intended to reflect Unicode
|
|
606 properties as defined in the Unicode Character Database
|
|
607 (UCD) and Unicode Technical Reports (UTR). For details
|
|
608 about the properties see http://www.unicode.org/ . For
|
|
609 names of Unicode properties see the file PropertyAliases.txt
|
|
610
|
|
611 ***********************************************************************/
|
|
612
|
|
613 uint getProperty (dchar c, Property p)
|
|
614 {
|
|
615 return u_getIntPropertyValue (cast(uint) c, cast(uint) p);
|
|
616 }
|
|
617
|
|
618 /***********************************************************************
|
|
619
|
|
620 Get the minimum value for an enumerated/integer/binary
|
|
621 Unicode property
|
|
622
|
|
623 ***********************************************************************/
|
|
624
|
|
625 uint getPropertyMinimum (Property p)
|
|
626 {
|
|
627 return u_getIntPropertyMinValue (p);
|
|
628 }
|
|
629
|
|
630 /***********************************************************************
|
|
631
|
|
632 Get the maximum value for an enumerated/integer/binary
|
|
633 Unicode property
|
|
634
|
|
635 ***********************************************************************/
|
|
636
|
|
637 uint getPropertyMaximum (Property p)
|
|
638 {
|
|
639 return u_getIntPropertyMaxValue (p);
|
|
640 }
|
|
641
|
|
642 /***********************************************************************
|
|
643
|
|
644 Returns the bidirectional category value for the code
|
|
645 point, which is used in the Unicode bidirectional algorithm
|
|
646 (UAX #9 http://www.unicode.org/reports/tr9/).
|
|
647
|
|
648 ***********************************************************************/
|
|
649
|
|
650 Direction charDirection (dchar c)
|
|
651 {
|
|
652 return cast(Direction) u_charDirection (c);
|
|
653 }
|
|
654
|
|
655 /***********************************************************************
|
|
656
|
|
657 Returns the Unicode allocation block that contains the
|
|
658 character
|
|
659
|
|
660 ***********************************************************************/
|
|
661
|
|
662 BlockCode getBlockCode (dchar c)
|
|
663 {
|
|
664 return cast(BlockCode) ublock_getCode (c);
|
|
665 }
|
|
666
|
|
667 /***********************************************************************
|
|
668
|
|
669 Retrieve the name of a Unicode character.
|
|
670
|
|
671 ***********************************************************************/
|
|
672
|
|
673 char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst)
|
|
674 {
|
|
675 UErrorCode e;
|
|
676
|
|
677 uint len = u_charName (c, choice, dst.ptr, dst.length, e);
|
|
678 testError (e, "failed to extract char name (buffer too small?)");
|
|
679 return dst [0..len];
|
|
680 }
|
|
681
|
|
682 /***********************************************************************
|
|
683
|
|
684 Get the ISO 10646 comment for a character.
|
|
685
|
|
686 ***********************************************************************/
|
|
687
|
|
688 char[] getComment (dchar c, inout char[] dst)
|
|
689 {
|
|
690 UErrorCode e;
|
|
691
|
|
692 uint len = u_getISOComment (c, dst.ptr, dst.length, e);
|
|
693 testError (e, "failed to extract comment (buffer too small?)");
|
|
694 return dst [0..len];
|
|
695 }
|
|
696
|
|
697 /***********************************************************************
|
|
698
|
|
699 Find a Unicode character by its name and return its code
|
|
700 point value.
|
|
701
|
|
702 ***********************************************************************/
|
|
703
|
|
704 dchar charFromName (CharNameChoice choice, char[] name)
|
|
705 {
|
|
706 UErrorCode e;
|
|
707
|
|
708 dchar c = u_charFromName (choice, toString(name), e);
|
|
709 testError (e, "failed to locate char name");
|
|
710 return c;
|
|
711 }
|
|
712
|
|
713 /***********************************************************************
|
|
714
|
|
715 Return the Unicode name for a given property, as given in the
|
|
716 Unicode database file PropertyAliases.txt
|
|
717
|
|
718 ***********************************************************************/
|
|
719
|
|
720 char[] getPropertyName (Property p, NameChoice choice)
|
|
721 {
|
|
722 return toArray (u_getPropertyName (p, choice));
|
|
723 }
|
|
724
|
|
725 /***********************************************************************
|
|
726
|
|
727 Return the Unicode name for a given property value, as given
|
|
728 in the Unicode database file PropertyValueAliases.txt.
|
|
729
|
|
730 ***********************************************************************/
|
|
731
|
|
732 char[] getPropertyValueName (Property p, NameChoice choice, uint value)
|
|
733 {
|
|
734 return toArray (u_getPropertyValueName (p, value, choice));
|
|
735 }
|
|
736
|
|
737 /***********************************************************************
|
|
738
|
|
739 Gets the Unicode version information
|
|
740
|
|
741 ***********************************************************************/
|
|
742
|
|
743 void getUnicodeVersion (inout Version v)
|
|
744 {
|
|
745 u_getUnicodeVersion (v);
|
|
746 }
|
|
747
|
|
748 /***********************************************************************
|
|
749
|
|
750 Get the "age" of the code point
|
|
751
|
|
752 ***********************************************************************/
|
|
753
|
|
754 void getCharAge (dchar c, inout Version v)
|
|
755 {
|
|
756 u_charAge (c, v);
|
|
757 }
|
|
758
|
|
759
|
|
760 /***********************************************************************
|
|
761
|
|
762 These are externalised directly to the client (sans wrapper),
|
|
763 but this may have to change for linux, depending upon the
|
|
764 ICU function-naming conventions within the Posix libraries.
|
|
765
|
|
766 ***********************************************************************/
|
|
767
|
|
768 final static extern (C)
|
|
769 {
|
|
770 /***************************************************************
|
|
771
|
|
772 Check if a code point has the Alphabetic Unicode
|
|
773 property.
|
|
774
|
|
775 ***************************************************************/
|
|
776
|
|
777 bool function (dchar c) isUAlphabetic;
|
|
778
|
|
779 /***************************************************************
|
|
780
|
|
781 Check if a code point has the Lowercase Unicode
|
|
782 property.
|
|
783
|
|
784 ***************************************************************/
|
|
785
|
|
786 bool function (dchar c) isULowercase;
|
|
787
|
|
788 /***************************************************************
|
|
789
|
|
790 Check if a code point has the Uppercase Unicode
|
|
791 property.
|
|
792
|
|
793 ***************************************************************/
|
|
794
|
|
795 bool function (dchar c) isUUppercase;
|
|
796
|
|
797 /***************************************************************
|
|
798
|
|
799 Check if a code point has the White_Space Unicode
|
|
800 property.
|
|
801
|
|
802 ***************************************************************/
|
|
803
|
|
804 bool function (dchar c) isUWhiteSpace;
|
|
805
|
|
806 /***************************************************************
|
|
807
|
|
808 Determines whether the specified code point has the
|
|
809 general category "Ll" (lowercase letter).
|
|
810
|
|
811 ***************************************************************/
|
|
812
|
|
813 bool function (dchar c) isLower;
|
|
814
|
|
815 /***************************************************************
|
|
816
|
|
817 Determines whether the specified code point has the
|
|
818 general category "Lu" (uppercase letter).
|
|
819
|
|
820 ***************************************************************/
|
|
821
|
|
822 bool function (dchar c) isUpper;
|
|
823
|
|
824 /***************************************************************
|
|
825
|
|
826 Determines whether the specified code point is a
|
|
827 titlecase letter.
|
|
828
|
|
829 ***************************************************************/
|
|
830
|
|
831 bool function (dchar c) isTitle;
|
|
832
|
|
833 /***************************************************************
|
|
834
|
|
835 Determines whether the specified code point is a
|
|
836 digit character according to Java.
|
|
837
|
|
838 ***************************************************************/
|
|
839
|
|
840 bool function (dchar c) isDigit;
|
|
841
|
|
842 /***************************************************************
|
|
843
|
|
844 Determines whether the specified code point is a
|
|
845 letter character.
|
|
846
|
|
847 ***************************************************************/
|
|
848
|
|
849 bool function (dchar c) isAlpha;
|
|
850
|
|
851 /***************************************************************
|
|
852
|
|
853 Determines whether the specified code point is an
|
|
854 alphanumeric character (letter or digit) according
|
|
855 to Java.
|
|
856
|
|
857 ***************************************************************/
|
|
858
|
|
859 bool function (dchar c) isAlphaNumeric;
|
|
860
|
|
861 /***************************************************************
|
|
862
|
|
863 Determines whether the specified code point is a
|
|
864 hexadecimal digit.
|
|
865
|
|
866 ***************************************************************/
|
|
867
|
|
868 bool function (dchar c) isHexDigit;
|
|
869
|
|
870 /***************************************************************
|
|
871
|
|
872 Determines whether the specified code point is a
|
|
873 punctuation character.
|
|
874
|
|
875 ***************************************************************/
|
|
876
|
|
877 bool function (dchar c) isPunct;
|
|
878
|
|
879 /***************************************************************
|
|
880
|
|
881 Determines whether the specified code point is a
|
|
882 "graphic" character (printable, excluding spaces).
|
|
883
|
|
884 ***************************************************************/
|
|
885
|
|
886 bool function (dchar c) isGraph;
|
|
887
|
|
888 /***************************************************************
|
|
889
|
|
890 Determines whether the specified code point is a
|
|
891 "blank" or "horizontal space", a character that
|
|
892 visibly separates words on a line.
|
|
893
|
|
894 ***************************************************************/
|
|
895
|
|
896 bool function (dchar c) isBlank;
|
|
897
|
|
898 /***************************************************************
|
|
899
|
|
900 Determines whether the specified code point is
|
|
901 "defined", which usually means that it is assigned
|
|
902 a character.
|
|
903
|
|
904 ***************************************************************/
|
|
905
|
|
906 bool function (dchar c) isDefined;
|
|
907
|
|
908 /***************************************************************
|
|
909
|
|
910 Determines if the specified character is a space
|
|
911 character or not.
|
|
912
|
|
913 ***************************************************************/
|
|
914
|
|
915 bool function (dchar c) isSpace;
|
|
916
|
|
917 /***************************************************************
|
|
918
|
|
919 Determine if the specified code point is a space
|
|
920 character according to Java.
|
|
921
|
|
922 ***************************************************************/
|
|
923
|
|
924 bool function (dchar c) isJavaSpaceChar;
|
|
925
|
|
926 /***************************************************************
|
|
927
|
|
928 Determines if the specified code point is a whitespace
|
|
929 character according to Java/ICU.
|
|
930
|
|
931 ***************************************************************/
|
|
932
|
|
933 bool function (dchar c) isWhiteSpace;
|
|
934
|
|
935 /***************************************************************
|
|
936
|
|
937 Determines whether the specified code point is a
|
|
938 control character (as defined by this function).
|
|
939
|
|
940 ***************************************************************/
|
|
941
|
|
942 bool function (dchar c) isCtrl;
|
|
943
|
|
944 /***************************************************************
|
|
945
|
|
946 Determines whether the specified code point is an ISO
|
|
947 control code.
|
|
948
|
|
949 ***************************************************************/
|
|
950
|
|
951 bool function (dchar c) isISOControl;
|
|
952
|
|
953 /***************************************************************
|
|
954
|
|
955 Determines whether the specified code point is a
|
|
956 printable character.
|
|
957
|
|
958 ***************************************************************/
|
|
959
|
|
960 bool function (dchar c) isPrint;
|
|
961
|
|
962 /***************************************************************
|
|
963
|
|
964 Determines whether the specified code point is a
|
|
965 base character.
|
|
966
|
|
967 ***************************************************************/
|
|
968
|
|
969 bool function (dchar c) isBase;
|
|
970
|
|
971 /***************************************************************
|
|
972
|
|
973 Determines if the specified character is permissible
|
|
974 as the first character in an identifier according to
|
|
975 Unicode (The Unicode Standard, Version 3.0, chapter
|
|
976 5.16 Identifiers).
|
|
977
|
|
978 ***************************************************************/
|
|
979
|
|
980 bool function (dchar c) isIDStart;
|
|
981
|
|
982 /***************************************************************
|
|
983
|
|
984 Determines if the specified character is permissible
|
|
985 in an identifier according to Java.
|
|
986
|
|
987 ***************************************************************/
|
|
988
|
|
989 bool function (dchar c) isIDPart;
|
|
990
|
|
991 /***************************************************************
|
|
992
|
|
993 Determines if the specified character should be
|
|
994 regarded as an ignorable character in an identifier,
|
|
995 according to Java.
|
|
996
|
|
997 ***************************************************************/
|
|
998
|
|
999 bool function (dchar c) isIDIgnorable;
|
|
1000
|
|
1001 /***************************************************************
|
|
1002
|
|
1003 Determines if the specified character is permissible
|
|
1004 as the first character in a Java identifier.
|
|
1005
|
|
1006 ***************************************************************/
|
|
1007
|
|
1008 bool function (dchar c) isJavaIDStart;
|
|
1009
|
|
1010 /***************************************************************
|
|
1011
|
|
1012 Determines if the specified character is permissible
|
|
1013 in a Java identifier.
|
|
1014
|
|
1015 ***************************************************************/
|
|
1016
|
|
1017 bool function (dchar c) isJavaIDPart;
|
|
1018
|
|
1019 /***************************************************************
|
|
1020
|
|
1021 Determines whether the code point has the
|
|
1022 Bidi_Mirrored property.
|
|
1023
|
|
1024 ***************************************************************/
|
|
1025
|
|
1026 bool function (dchar c) isMirrored;
|
|
1027
|
|
1028 /***************************************************************
|
|
1029
|
|
1030 Returns the decimal digit value of a decimal digit
|
|
1031 character.
|
|
1032
|
|
1033 ***************************************************************/
|
|
1034
|
|
1035 ubyte function (dchar c) charDigitValue;
|
|
1036
|
|
1037 /***************************************************************
|
|
1038
|
|
1039 Maps the specified character to a "mirror-image"
|
|
1040 character.
|
|
1041
|
|
1042 ***************************************************************/
|
|
1043
|
|
1044 dchar function (dchar c) charMirror;
|
|
1045
|
|
1046 /***************************************************************
|
|
1047
|
|
1048 Returns the general category value for the code point.
|
|
1049
|
|
1050 ***************************************************************/
|
|
1051
|
|
1052 ubyte function (dchar c) charType;
|
|
1053
|
|
1054 /***************************************************************
|
|
1055
|
|
1056 Returns the combining class of the code point as
|
|
1057 specified in UnicodeData.txt.
|
|
1058
|
|
1059 ***************************************************************/
|
|
1060
|
|
1061 ubyte function (dchar c) getCombiningClass;
|
|
1062
|
|
1063 /***************************************************************
|
|
1064
|
|
1065 The given character is mapped to its lowercase
|
|
1066 equivalent according to UnicodeData.txt; if the
|
|
1067 character has no lowercase equivalent, the
|
|
1068 character itself is returned.
|
|
1069
|
|
1070 ***************************************************************/
|
|
1071
|
|
1072 dchar function (dchar c) toLower;
|
|
1073
|
|
1074 /***************************************************************
|
|
1075
|
|
1076 The given character is mapped to its uppercase equivalent
|
|
1077 according to UnicodeData.txt; if the character has no
|
|
1078 uppercase equivalent, the character itself is returned.
|
|
1079
|
|
1080 ***************************************************************/
|
|
1081
|
|
1082 dchar function (dchar c) toUpper;
|
|
1083
|
|
1084 /***************************************************************
|
|
1085
|
|
1086 The given character is mapped to its titlecase
|
|
1087 equivalent according to UnicodeData.txt; if none
|
|
1088 is defined, the character itself is returned.
|
|
1089
|
|
1090 ***************************************************************/
|
|
1091
|
|
1092 dchar function (dchar c) toTitle;
|
|
1093
|
|
1094 /***************************************************************
|
|
1095
|
|
1096 The given character is mapped to its case folding
|
|
1097 equivalent according to UnicodeData.txt and
|
|
1098 CaseFolding.txt; if the character has no case folding
|
|
1099 equivalent, the character itself is returned.
|
|
1100
|
|
1101 ***************************************************************/
|
|
1102
|
|
1103 dchar function (dchar c, uint options) foldCase;
|
|
1104
|
|
1105 /***************************************************************
|
|
1106
|
|
1107 Returns the decimal digit value of the code point in
|
|
1108 the specified radix.
|
|
1109
|
|
1110 ***************************************************************/
|
|
1111
|
|
1112 uint function (dchar ch, ubyte radix) digit;
|
|
1113
|
|
1114 /***************************************************************
|
|
1115
|
|
1116 Determines the character representation for a specific
|
|
1117 digit in the specified radix.
|
|
1118
|
|
1119 ***************************************************************/
|
|
1120
|
|
1121 dchar function (uint digit, ubyte radix) forDigit;
|
|
1122
|
|
1123 /***************************************************************
|
|
1124
|
|
1125 Get the numeric value for a Unicode code point as
|
|
1126 defined in the Unicode Character Database.
|
|
1127
|
|
1128 ***************************************************************/
|
|
1129
|
|
1130 double function (dchar c) getNumericValue;
|
|
1131 }
|
|
1132
|
|
1133
|
|
1134 /***********************************************************************
|
|
1135
|
|
1136 Bind the ICU functions from a shared library. This is
|
|
1137 complicated by the issues regarding D and DLLs on the
|
|
1138 Windows platform
|
|
1139
|
|
1140 ***********************************************************************/
|
|
1141
|
|
1142 private static void* library;
|
|
1143
|
|
1144 /***********************************************************************
|
|
1145
|
|
1146 ***********************************************************************/
|
|
1147
|
|
1148 private static extern (C)
|
|
1149 {
|
|
1150 uint function (uint, uint) u_getIntPropertyValue;
|
|
1151 uint function (uint) u_getIntPropertyMinValue;
|
|
1152 uint function (uint) u_getIntPropertyMaxValue;
|
|
1153 uint function (dchar) u_charDirection;
|
|
1154 uint function (dchar) ublock_getCode;
|
|
1155 uint function (dchar, uint, char*, uint, inout UErrorCode) u_charName;
|
|
1156 uint function (dchar, char*, uint, inout UErrorCode) u_getISOComment;
|
|
1157 uint function (uint, char*, inout UErrorCode) u_charFromName;
|
|
1158 char* function (uint, uint) u_getPropertyName;
|
|
1159 char* function (uint, uint, uint) u_getPropertyValueName;
|
|
1160 void function (inout Version) u_getUnicodeVersion;
|
|
1161 void function (dchar, inout Version) u_charAge;
|
|
1162 }
|
|
1163
|
|
1164 /***********************************************************************
|
|
1165
|
|
1166 ***********************************************************************/
|
|
1167
|
|
1168 static FunctionLoader.Bind[] targets =
|
|
1169 [
|
|
1170 {cast(void**) &forDigit, "u_forDigit"},
|
|
1171 {cast(void**) &digit, "u_digit"},
|
|
1172 {cast(void**) &foldCase, "u_foldCase"},
|
|
1173 {cast(void**) &toTitle, "u_totitle"},
|
|
1174 {cast(void**) &toUpper, "u_toupper"},
|
|
1175 {cast(void**) &toLower, "u_tolower"},
|
|
1176 {cast(void**) &charType, "u_charType"},
|
|
1177 {cast(void**) &charMirror, "u_charMirror"},
|
|
1178 {cast(void**) &charDigitValue, "u_charDigitValue"},
|
|
1179 {cast(void**) &isJavaIDPart, "u_isJavaIDPart"},
|
|
1180 {cast(void**) &isJavaIDStart, "u_isJavaIDStart"},
|
|
1181 {cast(void**) &isIDIgnorable, "u_isIDIgnorable"},
|
|
1182 {cast(void**) &isIDPart, "u_isIDPart"},
|
|
1183 {cast(void**) &isIDStart, "u_isIDStart"},
|
|
1184 {cast(void**) &isMirrored, "u_isMirrored"},
|
|
1185 {cast(void**) &isBase, "u_isbase"},
|
|
1186 {cast(void**) &isPrint, "u_isprint"},
|
|
1187 {cast(void**) &isISOControl, "u_isISOControl"},
|
|
1188 {cast(void**) &isCtrl, "u_iscntrl"},
|
|
1189 {cast(void**) &isWhiteSpace, "u_isWhitespace"},
|
|
1190 {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"},
|
|
1191 {cast(void**) &isSpace, "u_isspace"},
|
|
1192 {cast(void**) &isDefined, "u_isdefined"},
|
|
1193 {cast(void**) &isBlank, "u_isblank"},
|
|
1194 {cast(void**) &isGraph, "u_isgraph"},
|
|
1195 {cast(void**) &isPunct, "u_ispunct"},
|
|
1196 {cast(void**) &isHexDigit, "u_isxdigit"},
|
|
1197 {cast(void**) &isAlpha, "u_isalpha"},
|
|
1198 {cast(void**) &isAlphaNumeric, "u_isalnum"},
|
|
1199 {cast(void**) &isDigit, "u_isdigit"},
|
|
1200 {cast(void**) &isTitle, "u_istitle"},
|
|
1201 {cast(void**) &isUpper, "u_isupper"},
|
|
1202 {cast(void**) &isLower, "u_islower"},
|
|
1203 {cast(void**) &isUAlphabetic, "u_isUAlphabetic"},
|
|
1204 {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"},
|
|
1205 {cast(void**) &isUUppercase, "u_isUUppercase"},
|
|
1206 {cast(void**) &isULowercase, "u_isULowercase"},
|
|
1207 {cast(void**) &getNumericValue, "u_getNumericValue"},
|
|
1208 {cast(void**) &getCombiningClass, "u_getCombiningClass"},
|
|
1209 {cast(void**) &u_getIntPropertyValue, "u_getIntPropertyValue"},
|
|
1210 {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"},
|
|
1211 {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"},
|
|
1212 {cast(void**) &u_charDirection, "u_charDirection"},
|
|
1213 {cast(void**) &ublock_getCode, "ublock_getCode"},
|
|
1214 {cast(void**) &u_charName, "u_charName"},
|
|
1215 {cast(void**) &u_getISOComment, "u_getISOComment"},
|
|
1216 {cast(void**) &u_charFromName, "u_charFromName"},
|
|
1217 {cast(void**) &u_getPropertyName, "u_getPropertyName"},
|
|
1218 {cast(void**) &u_getPropertyValueName, "u_getPropertyValueName"},
|
|
1219 {cast(void**) &u_getUnicodeVersion, "u_getUnicodeVersion"},
|
|
1220 {cast(void**) &u_charAge, "u_charAge"},
|
|
1221 ];
|
|
1222
|
|
1223 /***********************************************************************
|
|
1224
|
|
1225 ***********************************************************************/
|
|
1226
|
|
1227 static this ()
|
|
1228 {
|
|
1229 library = FunctionLoader.bind (icuuc, targets);
|
|
1230 }
|
|
1231
|
|
1232 /***********************************************************************
|
|
1233
|
|
1234 ***********************************************************************/
|
|
1235
|
|
1236 static ~this ()
|
|
1237 {
|
|
1238 FunctionLoader.unbind (library);
|
|
1239 }
|
|
1240 }
|