92
|
1 /*******************************************************************************
|
|
2
|
|
3 @file UString.d
|
|
4
|
|
5 Copyright (c) 2004 Kris Bell
|
|
6
|
|
7 This software is provided 'as-is', without any express or implied
|
|
8 warranty. In no event will the authors be held liable for damages
|
|
9 of any kind arising from the use of this software.
|
|
10
|
|
11 Permission is hereby granted to anyone to use this software for any
|
|
12 purpose, including commercial applications, and to alter it and/or
|
|
13 redistribute it freely, subject to the following restrictions:
|
|
14
|
|
15 1. The origin of this software must not be misrepresented; you must
|
|
16 not claim that you wrote the original software. If you use this
|
|
17 software in a product, an acknowledgment within documentation of
|
|
18 said product would be appreciated but is not required.
|
|
19
|
|
20 2. Altered source versions must be plainly marked as such, and must
|
|
21 not be misrepresented as being the original software.
|
|
22
|
|
23 3. This notice may not be removed or altered from any distribution
|
|
24 of the source.
|
|
25
|
|
26 4. Derivative works are permitted, but they must carry this notice
|
|
27 in full and credit the original source.
|
|
28
|
|
29
|
|
30 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
31
|
|
32
|
|
33 @version Initial version, October 2004
|
|
34 @author Kris
|
|
35
|
|
36 Note that this package and documentation is built around the ICU
|
|
37 project (http://oss.software.ibm.com/icu/). Below is the license
|
|
38 statement as specified by that software:
|
|
39
|
|
40
|
|
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
42
|
|
43
|
|
44 ICU License - ICU 1.8.1 and later
|
|
45
|
|
46 COPYRIGHT AND PERMISSION NOTICE
|
|
47
|
|
48 Copyright (c) 1995-2003 International Business Machines Corporation and
|
|
49 others.
|
|
50
|
|
51 All rights reserved.
|
|
52
|
|
53 Permission is hereby granted, free of charge, to any person obtaining a
|
|
54 copy of this software and associated documentation files (the
|
|
55 "Software"), to deal in the Software without restriction, including
|
|
56 without limitation the rights to use, copy, modify, merge, publish,
|
|
57 distribute, and/or sell copies of the Software, and to permit persons
|
|
58 to whom the Software is furnished to do so, provided that the above
|
|
59 copyright notice(s) and this permission notice appear in all copies of
|
|
60 the Software and that both the above copyright notice(s) and this
|
|
61 permission notice appear in supporting documentation.
|
|
62
|
|
63 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
64 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
65 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
|
66 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
67 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
|
|
68 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
|
|
69 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
70 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
|
71 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
72
|
|
73 Except as contained in this notice, the name of a copyright holder
|
|
74 shall not be used in advertising or otherwise to promote the sale, use
|
|
75 or other dealings in this Software without prior written authorization
|
|
76 of the copyright holder.
|
|
77
|
|
78 ----------------------------------------------------------------------
|
|
79
|
|
80 All trademarks and registered trademarks mentioned herein are the
|
|
81 property of their respective owners.
|
|
82
|
|
83 *******************************************************************************/
|
|
84
|
|
85 module dwtx.dwtxhelper.mangoicu.UString;
|
|
86
|
|
87 private import dwtx.dwtxhelper.mangoicu.ICU,
|
|
88 dwtx.dwtxhelper.mangoicu.UChar,
|
|
89 dwtx.dwtxhelper.mangoicu.ULocale;
|
|
90
|
|
91 /*******************************************************************************
|
|
92
|
|
93 *******************************************************************************/
|
|
94
|
|
95 private extern (C) void memmove (void* dst, void* src, uint bytes);
|
|
96
|
|
97 /*******************************************************************************
|
|
98
|
|
99 Bind to the IReadable and IWritable interfaces if we're building
|
|
100 along with the mango.io package
|
|
101
|
|
102 *******************************************************************************/
|
|
103
|
|
104 version=Isolated;
|
|
105 version (Isolated)
|
|
106 {
|
|
107 private interface ITextOther {}
|
|
108 private interface IStringOther {}
|
|
109 }
|
|
110 else
|
|
111 {
|
|
112 private import dwtx.dwtxhelper.mangoicu.UMango;
|
|
113
|
|
114 private import mango.io.model.IReader,
|
|
115 mango.io.model.IWriter;
|
|
116
|
|
117 private interface ITextOther : IWritable {}
|
|
118 private interface IStringOther : IReadable {}
|
|
119 }
|
|
120
|
|
121
|
|
122 /*******************************************************************************
|
|
123
|
|
124 UString is a string class that stores Unicode characters directly
|
|
125 and provides similar functionality as the Java String class.
|
|
126
|
|
127 In ICU, a Unicode string consists of 16-bit Unicode code units.
|
|
128 A Unicode character may be stored with either one code unit —
|
|
129 which is the most common case — or with a matched pair of
|
|
130 special code units ("surrogates"). The data type for code units
|
|
131 is UChar.
|
|
132
|
|
133 For single-character handling, a Unicode character code point is
|
|
134 a value in the range 0..0x10ffff. ICU uses the UChar32 type for
|
|
135 code points.
|
|
136
|
|
137 Indexes and offsets into and lengths of strings always count code
|
|
138 units, not code points. This is the same as with multi-byte char*
|
|
139 strings in traditional string handling. Operations on partial
|
|
140 strings typically do not test for code point boundaries. If necessary,
|
|
141 the user needs to take care of such boundaries by testing for the code
|
|
142 unit values or by using functions like getChar32Start()
|
|
143 and getChar32Limit()
|
|
144
|
|
145 UString methods are more lenient with regard to input parameter values
|
|
146 than other ICU APIs. In particular:
|
|
147
|
|
148 - If indexes are out of bounds for a UString object (< 0 or > length)
|
|
149 then they are "pinned" to the nearest boundary.
|
|
150
|
|
151 - If primitive string pointer values (e.g., const wchar* or char*) for
|
|
152 input strings are null, then those input string parameters are treated
|
|
153 as if they pointed to an empty string. However, this is not the case
|
|
154 for char* parameters for charset names or other IDs.
|
|
155
|
|
156 *******************************************************************************/
|
|
157
|
|
158 class UString : UStringView, IStringOther
|
|
159 {
|
|
160 alias opCat append;
|
|
161 alias opIndexAssign setCharAt;
|
|
162
|
|
163 /***********************************************************************
|
|
164
|
|
165 Create an empty UString with the specified available space
|
|
166
|
|
167 ***********************************************************************/
|
|
168
|
|
169 this (uint space = 0)
|
|
170 {
|
|
171 content.length = space;
|
|
172 mutable = true;
|
|
173 }
|
|
174
|
|
175 /***********************************************************************
|
|
176
|
|
177 Create a UString upon the provided content. If said content
|
|
178 is immutable (read-only) then you might consider setting the
|
|
179 'mutable' parameter to false. Doing so will avoid allocating
|
|
180 heap-space for the content until it is modified.
|
|
181
|
|
182 ***********************************************************************/
|
|
183
|
|
184 this (wchar[] content, bool mutable = true)
|
|
185 {
|
|
186 setTo (content, mutable);
|
|
187 }
|
|
188
|
|
189 /***********************************************************************
|
|
190
|
|
191 Create a UString via the content of a UStringView. Note that the
|
|
192 default is to assume the content is immutable (read-only).
|
|
193
|
|
194 ***********************************************************************/
|
|
195
|
|
196 this (UStringView other, bool mutable = false)
|
|
197 {
|
|
198 this (other.get, mutable);
|
|
199 }
|
|
200
|
|
201 /***********************************************************************
|
|
202
|
|
203 Create a UString via the content of a UString. If said content
|
|
204 is immutable (read-only) then you might consider setting the
|
|
205 'mutable' parameter to false. Doing so will avoid allocating
|
|
206 heap-space for the content until it is modified via UString
|
|
207 methods.
|
|
208
|
|
209 ***********************************************************************/
|
|
210
|
|
211 this (UString other, bool mutable = true)
|
|
212 {
|
|
213 this (other.get, mutable);
|
|
214 }
|
|
215
|
|
216 /***********************************************************************
|
|
217
|
|
218 Support for reading content via the IO system
|
|
219
|
|
220 ***********************************************************************/
|
|
221
|
|
222 version (Isolated){}
|
|
223 else
|
|
224 {
|
|
225 /***************************************************************
|
|
226
|
|
227 Internal adapter to handle loading and conversion
|
|
228 of UString content. Once constructed, this may be
|
|
229 used as the target for an IReader. Alternatively,
|
|
230 invoke the load() method with an IBuffer of choice.
|
|
231
|
|
232 ***************************************************************/
|
|
233
|
|
234 class UStringDecoder : StringDecoder16
|
|
235 {
|
|
236 private UString s;
|
|
237
|
|
238 // construct a decoder on the given UString
|
|
239 this (UConverter c, uint bytes, UString s)
|
|
240 {
|
|
241 super (c, bytes);
|
|
242 this.s = s;
|
|
243 }
|
|
244
|
|
245 // IReadable adapter to perform the conversion
|
|
246 protected void read (IReader r)
|
|
247 {
|
|
248 load (r.buffer);
|
|
249 }
|
|
250
|
|
251 // read from the provided buffer until we
|
|
252 // either have all the content, or an eof
|
|
253 // condition throws an exception.
|
|
254 package void load (IBuffer b)
|
|
255 {
|
|
256 uint produced = super.read (b, s.content);
|
|
257 while (toGo)
|
|
258 {
|
|
259 s.expand (toGo);
|
|
260 produced += super.read (b, s.content[produced..$]);
|
|
261 }
|
|
262 s.len = produced;
|
|
263 }
|
|
264 }
|
|
265
|
|
266 /***************************************************************
|
|
267
|
|
268 Another constructor for loading known content length
|
|
269 into a UString.
|
|
270
|
|
271 ***************************************************************/
|
|
272
|
|
273 this (IBuffer buffer, uint contentLength, UConverter cvt)
|
|
274 {
|
|
275 this (contentLength);
|
|
276 UStringDecoder sd = new UStringDecoder (cvt, contentLength, this);
|
|
277 sd.load (buffer);
|
|
278 }
|
|
279
|
|
280 /***************************************************************
|
|
281
|
|
282 Read as many bytes from the input as is necessary
|
|
283 to produce the expected number of wchar elements.
|
|
284 This uses the default wchar handler, which can be
|
|
285 altered by binding a StringDecoder to the IReader
|
|
286 in use (see UMango for details).
|
|
287
|
|
288 We're mutable, so ensure we don't mess with the
|
|
289 IO buffers. Interestingly, changing the length
|
|
290 of a D array will account for slice assignments
|
|
291 (it checks the pointer to see if it's a starting
|
|
292 point in the pool). Unfortunately, that doesn't
|
|
293 catch the case where a slice starts at offset 0,
|
|
294 which is where IBuffer slices may come from.
|
|
295
|
|
296 To be safe, we ask the allocator in use whether
|
|
297 the content it provided can be mutated or not.
|
|
298 Note that this is not necessary for UStringView, since
|
|
299 that is a read-only construct.
|
|
300
|
|
301 ***************************************************************/
|
|
302
|
|
303 void read (IReader r)
|
|
304 {
|
|
305 r.get (content);
|
|
306 len = content.length;
|
|
307 mutable = r.getAllocator.isMutable (content);
|
|
308 }
|
|
309
|
|
310 /***************************************************************
|
|
311
|
|
312 Return a streaming decoder that can be used to
|
|
313 populate this UString with a specified number of
|
|
314 input bytes.
|
|
315
|
|
316 This differs from the above read() method in the
|
|
317 way content is read: in the above case, exactly
|
|
318 the specified number of wchar elements will be
|
|
319 converter from the input, whereas in this case
|
|
320 a variable number of wchar elements are converted
|
|
321 until 'bytes' have been read from the input. This
|
|
322 is useful in those cases where the original number
|
|
323 of elements has been lost, and only the resultant
|
|
324 converted byte-count remains (a la HTTP).
|
|
325
|
|
326 The returned StringDecoder is one-shot only. You may
|
|
327 reuse it (both the converter and the byte count) via
|
|
328 its reset() method.
|
|
329
|
|
330 One applies the resultant converter directly with an
|
|
331 IReader like so:
|
|
332
|
|
333 @code
|
|
334 UString s = ...;
|
|
335 IReader r = ...;
|
|
336
|
|
337 // r >> s.createDecoder(cvt, bytes);
|
|
338 r.get (s.createDecoder(cvt, bytes));
|
|
339 @endcode
|
|
340
|
|
341 which will read the specified number of bytes from
|
|
342 the input and convert them to an appropriate number
|
|
343 of wchars within the UString.
|
|
344
|
|
345 ***************************************************************/
|
|
346
|
|
347 StringDecoder createDecoder (UConverter c, uint bytes)
|
|
348 {
|
|
349 return new UStringDecoder (c, bytes, this);
|
|
350 }
|
|
351 }
|
|
352
|
|
353 /***********************************************************************
|
|
354
|
|
355 Append text to this UString
|
|
356
|
|
357 ***********************************************************************/
|
|
358
|
|
359 UString opCat (UStringView other)
|
|
360 {
|
|
361 return opCat (other.get);
|
|
362 }
|
|
363
|
|
364 /***********************************************************************
|
|
365
|
|
366 Append partial text to this UString
|
|
367
|
|
368 ***********************************************************************/
|
|
369
|
|
370 UString opCat (UStringView other, uint start, uint len=uint.max)
|
|
371 {
|
|
372 other.pinIndices (start, len);
|
|
373 return opCat (other.content [start..start+len]);
|
|
374 }
|
|
375
|
|
376 /***********************************************************************
|
|
377
|
|
378 Append a single character to this UString
|
|
379
|
|
380 ***********************************************************************/
|
|
381
|
|
382 UString opCat (wchar chr)
|
|
383 {
|
|
384 return opCat (&chr, 1);
|
|
385 }
|
|
386
|
|
387 /***********************************************************************
|
|
388
|
|
389 Append text to this UString
|
|
390
|
|
391 ***********************************************************************/
|
|
392
|
|
393 UString opCat (wchar[] chars)
|
|
394 {
|
|
395 return opCat (chars.ptr, chars.length);
|
|
396 }
|
|
397
|
|
398 /***********************************************************************
|
|
399
|
|
400 Converts a sequence of UTF-8 bytes to UChars (UTF-16)
|
|
401
|
|
402 ***********************************************************************/
|
|
403
|
|
404 UString opCat (char[] chars)
|
|
405 {
|
|
406 uint fmt (wchar* dst, uint len, inout UErrorCode e)
|
|
407 {
|
|
408 uint x;
|
|
409
|
|
410 u_strFromUTF8 (dst, len, &x, chars.ptr, chars.length, e);
|
|
411 return x;
|
|
412 }
|
|
413
|
|
414 expand (chars.length);
|
|
415 return format (&fmt, "failed to append UTF char[]");
|
|
416 }
|
|
417
|
|
418 /***********************************************************************
|
|
419
|
|
420 Set a section of this UString to the specified character
|
|
421
|
|
422 ***********************************************************************/
|
|
423
|
|
424 UString setTo (wchar chr, uint start=0, uint len=uint.max)
|
|
425 {
|
|
426 pinIndices (start, len);
|
|
427 if (! mutable)
|
|
428 realloc ();
|
|
429 content [start..start+len] = chr;
|
|
430 return this;
|
|
431 }
|
|
432
|
|
433 /***********************************************************************
|
|
434
|
|
435 Set the content to the provided array. Parameter 'mutable'
|
|
436 specifies whether the given array is likely to change. If
|
|
437 not, the array is aliased until such time this UString is
|
|
438 altered.
|
|
439
|
|
440 ***********************************************************************/
|
|
441
|
|
442 UString setTo (wchar[] chars, bool mutable = true)
|
|
443 {
|
|
444 len = chars.length;
|
|
445 if ((this.mutable = mutable) == true)
|
|
446 content = chars.dup;
|
|
447 else
|
|
448 content = chars;
|
|
449 return this;
|
|
450 }
|
|
451
|
|
452 /***********************************************************************
|
|
453
|
|
454 Replace the content of this UString. If the new content
|
|
455 is immutable (read-only) then you might consider setting the
|
|
456 'mutable' parameter to false. Doing so will avoid allocating
|
|
457 heap-space for the content until it is modified via one of
|
|
458 these methods.
|
|
459
|
|
460 ***********************************************************************/
|
|
461
|
|
462 UString setTo (UStringView other, bool mutable = true)
|
|
463 {
|
|
464 return setTo (other.get, mutable);
|
|
465 }
|
|
466
|
|
467 /***********************************************************************
|
|
468
|
|
469 Replace the content of this UString. If the new content
|
|
470 is immutable (read-only) then you might consider setting the
|
|
471 'mutable' parameter to false. Doing so will avoid allocating
|
|
472 heap-space for the content until it is modified via one of
|
|
473 these methods.
|
|
474
|
|
475 ***********************************************************************/
|
|
476
|
|
477 UString setTo (UStringView other, uint start, uint len, bool mutable = true)
|
|
478 {
|
|
479 other.pinIndices (start, len);
|
|
480 return setTo (other.content [start..start+len], mutable);
|
|
481 }
|
|
482
|
|
483 /***********************************************************************
|
|
484
|
|
485 Replace the character at the specified location.
|
|
486
|
|
487 ***********************************************************************/
|
|
488
|
|
489 final UString opIndexAssign (wchar chr, uint index)
|
|
490 in {
|
|
491 if (index >= len)
|
|
492 exception ("index of out bounds");
|
|
493 }
|
|
494 body
|
|
495 {
|
|
496 if (! mutable)
|
|
497 realloc ();
|
|
498 content [index] = chr;
|
|
499 return this;
|
|
500 }
|
|
501
|
|
502 /***********************************************************************
|
|
503
|
|
504 Remove a piece of this UString.
|
|
505
|
|
506 ***********************************************************************/
|
|
507
|
|
508 UString remove (uint start, uint length=uint.max)
|
|
509 {
|
|
510 pinIndices (start, length);
|
|
511 if (length)
|
|
512 if (start >= len)
|
|
513 truncate (start);
|
|
514 else
|
|
515 {
|
|
516 if (! mutable)
|
|
517 realloc ();
|
|
518
|
|
519 uint i = start + length;
|
|
520 memmove (&content[start], &content[i], (len-i) * wchar.sizeof);
|
|
521 len -= length;
|
|
522 }
|
|
523 return this;
|
|
524 }
|
|
525
|
|
526 /***********************************************************************
|
|
527
|
|
528 Truncate the length of this UString.
|
|
529
|
|
530 ***********************************************************************/
|
|
531
|
|
532 UString truncate (uint length=0)
|
|
533 {
|
|
534 if (length <= len)
|
|
535 len = length;
|
|
536 return this;
|
|
537 }
|
|
538
|
|
539 /***********************************************************************
|
|
540
|
|
541 Insert leading spaces in this UString
|
|
542
|
|
543 ***********************************************************************/
|
|
544
|
|
545 UString padLeading (uint count, wchar padChar = 0x0020)
|
|
546 {
|
|
547 expand (count);
|
|
548 memmove (&content[count], content.ptr, len * wchar.sizeof);
|
|
549 len += count;
|
|
550 return setTo (padChar, 0, count);
|
|
551 }
|
|
552
|
|
553 /***********************************************************************
|
|
554
|
|
555 Append some trailing spaces to this UString.
|
|
556
|
|
557 ***********************************************************************/
|
|
558
|
|
559 UString padTrailing (uint length, wchar padChar = 0x0020)
|
|
560 {
|
|
561 expand (length);
|
|
562 len += length;
|
|
563 return setTo (padChar, len-length, length);
|
|
564 }
|
|
565
|
|
566 /***********************************************************************
|
|
567
|
|
568 Check for available space within the buffer, and expand
|
|
569 as necessary.
|
|
570
|
|
571 ***********************************************************************/
|
|
572
|
|
573 package final void expand (uint count)
|
|
574 {
|
|
575 if ((len + count) > content.length)
|
|
576 realloc (count);
|
|
577 }
|
|
578
|
|
579 /***********************************************************************
|
|
580
|
|
581 Allocate memory due to a change in the content. We handle
|
|
582 the distinction between mutable and immutable here.
|
|
583
|
|
584 ***********************************************************************/
|
|
585
|
|
586 private final void realloc (uint count = 0)
|
|
587 {
|
|
588 uint size = (content.length + count + 63) & ~63;
|
|
589
|
|
590 if (mutable)
|
|
591 content.length = size;
|
|
592 else
|
|
593 {
|
|
594 mutable = true;
|
|
595 wchar[] x = content;
|
|
596 content = new wchar [size];
|
|
597 if (len)
|
|
598 content[0..len] = x;
|
|
599 }
|
|
600 }
|
|
601
|
|
602 /***********************************************************************
|
|
603
|
|
604 Internal method to support UString appending
|
|
605
|
|
606 ***********************************************************************/
|
|
607
|
|
608 private final UString opCat (wchar* chars, uint count)
|
|
609 {
|
|
610 expand (count);
|
|
611 content[len..len+count] = chars[0..count];
|
|
612 len += count;
|
|
613 return this;
|
|
614 }
|
|
615
|
|
616 /***********************************************************************
|
|
617
|
|
618 Internal method to support formatting into this UString.
|
|
619 This is used by many of the ICU wrappers to append content
|
|
620 into a UString.
|
|
621
|
|
622 ***********************************************************************/
|
|
623
|
|
624 typedef uint delegate (wchar* dst, uint len, inout UErrorCode e) Formatter;
|
|
625
|
|
626 package final UString format (Formatter format, char[] msg)
|
|
627 {
|
|
628 UErrorCode e;
|
|
629 uint length;
|
|
630
|
|
631 while (true)
|
|
632 {
|
|
633 e = e.OK;
|
|
634 length = format (&content[len], content.length - len, e);
|
|
635 if (e == e.BufferOverflow)
|
|
636 expand (length);
|
|
637 else
|
|
638 break;
|
|
639 }
|
|
640
|
|
641 if (isError (e))
|
|
642 exception (msg);
|
|
643
|
|
644 len += length;
|
|
645 return this;
|
|
646 }
|
|
647 }
|
|
648
|
|
649
|
|
650 /*******************************************************************************
|
|
651
|
|
652 Immutable (read-only) text -- use UString for mutable strings.
|
|
653
|
|
654 *******************************************************************************/
|
|
655
|
|
656 class UStringView : ICU, ITextOther
|
|
657 {
|
|
658 alias opIndex charAt;
|
|
659
|
|
660 // the core of the UStringView and UString attributes. The name 'len'
|
|
661 // is used rather than the more obvious 'length' since there is
|
|
662 // a collision with the silly array[length] syntactic sugar ...
|
|
663 package uint len;
|
|
664 package wchar[] content;
|
|
665
|
|
666 // this should probably be in UString only, but there seems to
|
|
667 // be a compiler bug where it doesn't get initialised correctly,
|
|
668 // and it's perhaps useful to have here for when a UString is
|
|
669 // passed as a UStringView argument.
|
|
670 private bool mutable;
|
|
671
|
|
672 // toFolded() argument
|
|
673 public enum CaseOption
|
|
674 {
|
|
675 Default = 0,
|
|
676 SpecialI = 1
|
|
677 }
|
|
678
|
|
679 /***********************************************************************
|
|
680
|
|
681 Hidden constructor
|
|
682
|
|
683 ***********************************************************************/
|
|
684
|
|
685 private this ()
|
|
686 {
|
|
687 }
|
|
688
|
|
689 /***********************************************************************
|
|
690
|
|
691 Construct read-only wrapper around the given content
|
|
692
|
|
693 ***********************************************************************/
|
|
694
|
|
695 this (wchar[] content)
|
|
696 {
|
|
697 this.content = content;
|
|
698 this.len = content.length;
|
|
699 }
|
|
700
|
|
701 /***********************************************************************
|
|
702
|
|
703 Support for writing via the Mango IO subsystem
|
|
704
|
|
705 ***********************************************************************/
|
|
706
|
|
707 version (Isolated){}
|
|
708 else
|
|
709 {
|
|
710 void write (IWriter w)
|
|
711 {
|
|
712 w.put (get);
|
|
713 }
|
|
714 }
|
|
715
|
|
716 /***********************************************************************
|
|
717
|
|
718 Return the valid content from this UStringView
|
|
719
|
|
720 ***********************************************************************/
|
|
721
|
|
722 final package wchar[] get ()
|
|
723 {
|
|
724 return content [0..len];
|
|
725 }
|
|
726
|
|
727 /***********************************************************************
|
|
728
|
|
729 Is this UStringView equal to another?
|
|
730
|
|
731 ***********************************************************************/
|
|
732
|
|
733 final override int opEquals (Object o)
|
|
734 {
|
|
735 UStringView other = cast(UStringView) o;
|
|
736
|
|
737 if (other)
|
|
738 return (other is this || compare (other) == 0);
|
|
739 return 0;
|
|
740 }
|
|
741
|
|
742 /***********************************************************************
|
|
743
|
|
744 Compare this UStringView to another.
|
|
745
|
|
746 ***********************************************************************/
|
|
747
|
|
748 final override int opCmp (Object o)
|
|
749 {
|
|
750 UStringView other = cast(UStringView) o;
|
|
751
|
|
752 if (other is this)
|
|
753 return 0;
|
|
754 else
|
|
755 if (other)
|
|
756 return compare (other);
|
|
757 return 1;
|
|
758 }
|
|
759
|
|
760 /***********************************************************************
|
|
761
|
|
762 Hash this UStringView
|
|
763
|
|
764 ***********************************************************************/
|
|
765
|
|
766 final override uint toHash ()
|
|
767 {
|
|
768 return typeid(wchar[]).getHash (&content[0..len]);
|
|
769 }
|
|
770
|
|
771 /***********************************************************************
|
|
772
|
|
773 Clone this UStringView into a UString
|
|
774
|
|
775 ***********************************************************************/
|
|
776
|
|
777 final UString copy ()
|
|
778 {
|
|
779 return new UString (content);
|
|
780 }
|
|
781
|
|
782 /***********************************************************************
|
|
783
|
|
784 Clone a section of this UStringView into a UString
|
|
785
|
|
786 ***********************************************************************/
|
|
787
|
|
788 final UString extract (uint start, uint len=uint.max)
|
|
789 {
|
|
790 pinIndices (start, len);
|
|
791 return new UString (content[start..start+len]);
|
|
792 }
|
|
793
|
|
794 /***********************************************************************
|
|
795
|
|
796 Count unicode code points in the length UChar code units of
|
|
797 the string. A code point may occupy either one or two UChar
|
|
798 code units. Counting code points involves reading all code
|
|
799 units.
|
|
800
|
|
801 ***********************************************************************/
|
|
802
|
|
803 final uint codePoints (uint start=0, uint length=uint.max)
|
|
804 {
|
|
805 pinIndices (start, length);
|
|
806 return u_countChar32 (&content[start], length);
|
|
807 }
|
|
808
|
|
809 /***********************************************************************
|
|
810
|
|
811 Return an indication whether or not there are surrogate pairs
|
|
812 within the string.
|
|
813
|
|
814 ***********************************************************************/
|
|
815
|
|
816 final bool hasSurrogates (uint start=0, uint length=uint.max)
|
|
817 {
|
|
818 pinIndices (start, length);
|
|
819 return codePoints (start, length) != length;
|
|
820 }
|
|
821
|
|
822 /***********************************************************************
|
|
823
|
|
824 Return the character at the specified position.
|
|
825
|
|
826 ***********************************************************************/
|
|
827
|
|
828 final wchar opIndex (uint index)
|
|
829 in {
|
|
830 if (index >= len)
|
|
831 exception ("index of out bounds");
|
|
832 }
|
|
833 body
|
|
834 {
|
|
835 return content [index];
|
|
836 }
|
|
837
|
|
838 /***********************************************************************
|
|
839
|
|
840 Return the length of the valid content
|
|
841
|
|
842 ***********************************************************************/
|
|
843
|
|
844 final uint length ()
|
|
845 {
|
|
846 return len;
|
|
847 }
|
|
848
|
|
849 /***********************************************************************
|
|
850
|
|
851 The comparison can be done in code unit order or in code
|
|
852 point order. They differ only in UTF-16 when comparing
|
|
853 supplementary code points (U+10000..U+10ffff) to BMP code
|
|
854 points near the end of the BMP (i.e., U+e000..U+ffff).
|
|
855
|
|
856 In code unit order, high BMP code points sort after
|
|
857 supplementary code points because they are stored as
|
|
858 pairs of surrogates which are at U+d800..U+dfff.
|
|
859
|
|
860 ***********************************************************************/
|
|
861
|
|
862 final int compare (UStringView other, bool codePointOrder=false)
|
|
863 {
|
|
864 return compare (other.get, codePointOrder);
|
|
865 }
|
|
866
|
|
867 /***********************************************************************
|
|
868
|
|
869 The comparison can be done in code unit order or in code
|
|
870 point order. They differ only in UTF-16 when comparing
|
|
871 supplementary code points (U+10000..U+10ffff) to BMP code
|
|
872 points near the end of the BMP (i.e., U+e000..U+ffff).
|
|
873
|
|
874 In code unit order, high BMP code points sort after
|
|
875 supplementary code points because they are stored as
|
|
876 pairs of surrogates which are at U+d800..U+dfff.
|
|
877
|
|
878 ***********************************************************************/
|
|
879
|
|
880 final int compare (wchar[] other, bool codePointOrder=false)
|
|
881 {
|
|
882 return u_strCompare (content.ptr, len, other.ptr, other.length, codePointOrder);
|
|
883 }
|
|
884
|
|
885 /***********************************************************************
|
|
886
|
|
887 The comparison can be done in UTF-16 code unit order or
|
|
888 in code point order. They differ only when comparing
|
|
889 supplementary code points (U+10000..U+10ffff) to BMP code
|
|
890 points near the end of the BMP (i.e., U+e000..U+ffff).
|
|
891
|
|
892 In code unit order, high BMP code points sort after
|
|
893 supplementary code points because they are stored as
|
|
894 pairs of surrogates which are at U+d800..U+dfff.
|
|
895
|
|
896 ***********************************************************************/
|
|
897
|
|
898 final int compareFolded (UStringView other, CaseOption option = CaseOption.Default)
|
|
899 {
|
|
900 return compareFolded (other.content, option);
|
|
901 }
|
|
902
|
|
903 /***********************************************************************
|
|
904
|
|
905 The comparison can be done in UTF-16 code unit order or
|
|
906 in code point order. They differ only when comparing
|
|
907 supplementary code points (U+10000..U+10ffff) to BMP code
|
|
908 points near the end of the BMP (i.e., U+e000..U+ffff).
|
|
909
|
|
910 In code unit order, high BMP code points sort after
|
|
911 supplementary code points because they are stored as
|
|
912 pairs of surrogates which are at U+d800..U+dfff.
|
|
913
|
|
914 ***********************************************************************/
|
|
915
|
|
916 final int compareFolded (wchar[] other, CaseOption option = CaseOption.Default)
|
|
917 {
|
|
918 return compareFolded (get, other, option);
|
|
919 }
|
|
920
|
|
921 /***********************************************************************
|
|
922
|
|
923 Does this UStringView start with specified string?
|
|
924
|
|
925 ***********************************************************************/
|
|
926
|
|
927 final bool startsWith (UStringView other)
|
|
928 {
|
|
929 return startsWith (other.get);
|
|
930 }
|
|
931
|
|
932 /***********************************************************************
|
|
933
|
|
934 Does this UStringView start with specified string?
|
|
935
|
|
936 ***********************************************************************/
|
|
937
|
|
938 final bool startsWith (wchar[] chars)
|
|
939 {
|
|
940 if (len >= chars.length)
|
|
941 return compareFolded (content[0..chars.length], chars) == 0;
|
|
942 return false;
|
|
943 }
|
|
944
|
|
945 /***********************************************************************
|
|
946
|
|
947 Does this UStringView end with specified string?
|
|
948
|
|
949 ***********************************************************************/
|
|
950
|
|
951 final bool endsWith (UStringView other)
|
|
952 {
|
|
953 return endsWith (other.get);
|
|
954 }
|
|
955
|
|
956 /***********************************************************************
|
|
957
|
|
958 Does this UStringView end with specified string?
|
|
959
|
|
960 ***********************************************************************/
|
|
961
|
|
962 final bool endsWith (wchar[] chars)
|
|
963 {
|
|
964 if (len >= chars.length)
|
|
965 return compareFolded (content[len-chars.length..len], chars) == 0;
|
|
966 return false;
|
|
967 }
|
|
968
|
|
969 /***********************************************************************
|
|
970
|
|
971 Find the first occurrence of a BMP code point in a string.
|
|
972 A surrogate code point is found only if its match in the
|
|
973 text is not part of a surrogate pair.
|
|
974
|
|
975 ***********************************************************************/
|
|
976
|
|
977 final uint indexOf (wchar c, uint start=0)
|
|
978 {
|
|
979 pinIndex (start);
|
|
980 wchar* s = u_memchr (&content[start], c, len-start);
|
|
981 if (s)
|
|
982 return s - content.ptr;
|
|
983 return uint.max;
|
|
984 }
|
|
985
|
|
986 /***********************************************************************
|
|
987
|
|
988 Find the first occurrence of a substring in a string.
|
|
989
|
|
990 The substring is found at code point boundaries. That means
|
|
991 that if the substring begins with a trail surrogate or ends
|
|
992 with a lead surrogate, then it is found only if these
|
|
993 surrogates stand alone in the text. Otherwise, the substring
|
|
994 edge units would be matched against halves of surrogate pairs.
|
|
995
|
|
996 ***********************************************************************/
|
|
997
|
|
998 final uint indexOf (UStringView other, uint start=0)
|
|
999 {
|
|
1000 return indexOf (other.get, start);
|
|
1001 }
|
|
1002
|
|
1003 /***********************************************************************
|
|
1004
|
|
1005 Find the first occurrence of a substring in a string.
|
|
1006
|
|
1007 The substring is found at code point boundaries. That means
|
|
1008 that if the substring begins with a trail surrogate or ends
|
|
1009 with a lead surrogate, then it is found only if these
|
|
1010 surrogates stand alone in the text. Otherwise, the substring
|
|
1011 edge units would be matched against halves of surrogate pairs.
|
|
1012
|
|
1013 ***********************************************************************/
|
|
1014
|
|
1015 final uint indexOf (wchar[] chars, uint start=0)
|
|
1016 {
|
|
1017 pinIndex (start);
|
|
1018 wchar* s = u_strFindFirst (&content[start], len-start, chars.ptr, chars.length);
|
|
1019 if (s)
|
|
1020 return s - content.ptr;
|
|
1021 return uint.max;
|
|
1022 }
|
|
1023
|
|
1024 /***********************************************************************
|
|
1025
|
|
1026 Find the last occurrence of a BMP code point in a string.
|
|
1027 A surrogate code point is found only if its match in the
|
|
1028 text is not part of a surrogate pair.
|
|
1029
|
|
1030 ***********************************************************************/
|
|
1031
|
|
1032 final uint lastIndexOf (wchar c, uint start=uint.max)
|
|
1033 {
|
|
1034 pinIndex (start);
|
|
1035 wchar* s = u_memrchr (content.ptr, c, start);
|
|
1036 if (s)
|
|
1037 return s - content.ptr;
|
|
1038 return uint.max;
|
|
1039 }
|
|
1040
|
|
1041 /***********************************************************************
|
|
1042
|
|
1043 Find the last occurrence of a BMP code point in a string.
|
|
1044 A surrogate code point is found only if its match in the
|
|
1045 text is not part of a surrogate pair.
|
|
1046
|
|
1047 ***********************************************************************/
|
|
1048
|
|
1049 final uint lastIndexOf (UStringView other, uint start=uint.max)
|
|
1050 {
|
|
1051 return lastIndexOf (other.get, start);
|
|
1052 }
|
|
1053
|
|
1054 /***********************************************************************
|
|
1055
|
|
1056 Find the last occurrence of a substring in a string.
|
|
1057
|
|
1058 The substring is found at code point boundaries. That means
|
|
1059 that if the substring begins with a trail surrogate or ends
|
|
1060 with a lead surrogate, then it is found only if these
|
|
1061 surrogates stand alone in the text. Otherwise, the substring
|
|
1062 edge units would be matched against halves of surrogate pairs.
|
|
1063
|
|
1064 ***********************************************************************/
|
|
1065
|
|
1066 final uint lastIndexOf (wchar[] chars, uint start=uint.max)
|
|
1067 {
|
|
1068 pinIndex (start);
|
|
1069 wchar* s = u_strFindLast (content.ptr, start, chars.ptr, chars.length);
|
|
1070 if (s)
|
|
1071 return s - content.ptr;
|
|
1072 return uint.max;
|
|
1073 }
|
|
1074
|
|
1075 /***********************************************************************
|
|
1076
|
|
1077 Lowercase the characters into a seperate UString.
|
|
1078
|
|
1079 Casing is locale-dependent and context-sensitive. The
|
|
1080 result may be longer or shorter than the original.
|
|
1081
|
|
1082 Note that the return value refers to the provided destination
|
|
1083 UString.
|
|
1084
|
|
1085 ***********************************************************************/
|
|
1086
|
|
1087 final UString toLower (UString dst)
|
|
1088 {
|
|
1089 return toLower (dst, ULocale.Default);
|
|
1090 }
|
|
1091
|
|
1092 /***********************************************************************
|
|
1093
|
|
1094 Lowercase the characters into a seperate UString.
|
|
1095
|
|
1096 Casing is locale-dependent and context-sensitive. The
|
|
1097 result may be longer or shorter than the original.
|
|
1098
|
|
1099 Note that the return value refers to the provided destination
|
|
1100 UString.
|
|
1101
|
|
1102 ***********************************************************************/
|
|
1103
|
|
1104 final UString toLower (UString dst, inout ULocale locale)
|
|
1105 {
|
|
1106 uint lower (wchar* dst, uint length, inout UErrorCode e)
|
|
1107 {
|
|
1108 return u_strToLower (dst, length, content.ptr, len, ICU.toString(locale.name), e);
|
|
1109 }
|
|
1110
|
|
1111 dst.expand (len + 32);
|
|
1112 return dst.format (&lower, "toLower() failed");
|
|
1113 }
|
|
1114
|
|
1115 /***********************************************************************
|
|
1116
|
|
1117 Uppercase the characters into a seperate UString.
|
|
1118
|
|
1119 Casing is locale-dependent and context-sensitive. The
|
|
1120 result may be longer or shorter than the original.
|
|
1121
|
|
1122 Note that the return value refers to the provided destination
|
|
1123 UString.
|
|
1124
|
|
1125 ***********************************************************************/
|
|
1126
|
|
1127 final UString toUpper (UString dst)
|
|
1128 {
|
|
1129 return toUpper (dst, ULocale.Default);
|
|
1130 }
|
|
1131
|
|
1132 /***********************************************************************
|
|
1133
|
|
1134 Uppercase the characters into a seperate UString.
|
|
1135
|
|
1136 Casing is locale-dependent and context-sensitive. The
|
|
1137 result may be longer or shorter than the original.
|
|
1138
|
|
1139 Note that the return value refers to the provided destination
|
|
1140 UString.
|
|
1141
|
|
1142 ***********************************************************************/
|
|
1143
|
|
1144 final UString toUpper (UString dst, inout ULocale locale)
|
|
1145 {
|
|
1146 uint upper (wchar* dst, uint length, inout UErrorCode e)
|
|
1147 {
|
|
1148 return u_strToUpper (dst, length, content.ptr, len, ICU.toString(locale.name), e);
|
|
1149 }
|
|
1150
|
|
1151 dst.expand (len + 32);
|
|
1152 return dst.format (&upper, "toUpper() failed");
|
|
1153 }
|
|
1154
|
|
1155 /***********************************************************************
|
|
1156
|
|
1157 Case-fold the characters into a seperate UString.
|
|
1158
|
|
1159 Case-folding is locale-independent and not context-sensitive,
|
|
1160 but there is an option for whether to include or exclude
|
|
1161 mappings for dotted I and dotless i that are marked with 'I'
|
|
1162 in CaseFolding.txt. The result may be longer or shorter than
|
|
1163 the original.
|
|
1164
|
|
1165 Note that the return value refers to the provided destination
|
|
1166 UString.
|
|
1167
|
|
1168 ***********************************************************************/
|
|
1169
|
|
1170 final UString toFolded (UString dst, CaseOption option = CaseOption.Default)
|
|
1171 {
|
|
1172 uint fold (wchar* dst, uint length, inout UErrorCode e)
|
|
1173 {
|
|
1174 return u_strFoldCase (dst, length, content.ptr, len, option, e);
|
|
1175 }
|
|
1176
|
|
1177 dst.expand (len + 32);
|
|
1178 return dst.format (&fold, "toFolded() failed");
|
|
1179 }
|
|
1180
|
|
1181 /***********************************************************************
|
|
1182
|
|
1183 Converts a sequence of wchar (UTF-16) to UTF-8 bytes. If
|
|
1184 the output array is not provided, an array of appropriate
|
|
1185 size will be allocated and returned. Where the output is
|
|
1186 provided, it must be large enough to hold potentially four
|
|
1187 bytes per character for surrogate-pairs or three bytes per
|
|
1188 character for BMP only. Consider using UConverter where
|
|
1189 streaming conversions are required.
|
|
1190
|
|
1191 Returns an array slice representing the valid UTF8 content.
|
|
1192
|
|
1193 ***********************************************************************/
|
|
1194
|
|
1195 final char[] toUtf8 (char[] dst = null)
|
|
1196 {
|
|
1197 uint x;
|
|
1198 UErrorCode e;
|
|
1199
|
|
1200 if (! cast(char*) dst)
|
|
1201 dst = new char[len * 4];
|
|
1202
|
|
1203 u_strToUTF8 (dst.ptr, dst.length, &x, content.ptr, len, e);
|
|
1204 testError (e, "failed to convert to UTF8");
|
|
1205 return dst [0..x];
|
|
1206 }
|
|
1207
|
|
1208 /***********************************************************************
|
|
1209
|
|
1210 Remove leading and trailing whitespace from this UStringView.
|
|
1211 Note that we slice the content to remove leading space.
|
|
1212
|
|
1213 ***********************************************************************/
|
|
1214
|
|
1215 UStringView trim ()
|
|
1216 {
|
|
1217 wchar c;
|
|
1218 uint i = len;
|
|
1219
|
|
1220 // cut off trailing white space
|
|
1221 while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c)))
|
|
1222 --i;
|
|
1223 len = i;
|
|
1224
|
|
1225 // now remove leading whitespace
|
|
1226 for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {}
|
|
1227 if (i)
|
|
1228 {
|
|
1229 len -= i;
|
|
1230 content = content[i..$-i];
|
|
1231 }
|
|
1232
|
|
1233 return this;
|
|
1234 }
|
|
1235
|
|
1236 /***********************************************************************
|
|
1237
|
|
1238 Unescape a string of characters and write the resulting
|
|
1239 Unicode characters to the destination buffer. The following
|
|
1240 escape sequences are recognized:
|
|
1241
|
|
1242 uhhhh 4 hex digits; h in [0-9A-Fa-f]
|
|
1243 Uhhhhhhhh 8 hex digits
|
|
1244 xhh 1-2 hex digits
|
|
1245 x{h...} 1-8 hex digits
|
|
1246 ooo 1-3 octal digits; o in [0-7]
|
|
1247 cX control-X; X is masked with 0x1F
|
|
1248
|
|
1249 as well as the standard ANSI C escapes:
|
|
1250
|
|
1251 a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
|
|
1252 v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
|
|
1253 \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
|
|
1254
|
|
1255 Anything else following a backslash is generically escaped.
|
|
1256 For example, "[a\\-z]" returns "[a-z]".
|
|
1257
|
|
1258 If an escape sequence is ill-formed, this method returns an
|
|
1259 empty string. An example of an ill-formed sequence is "\\u"
|
|
1260 followed by fewer than 4 hex digits.
|
|
1261
|
|
1262 ***********************************************************************/
|
|
1263
|
|
1264 final UString unEscape ()
|
|
1265 {
|
|
1266 UString result = new UString (len);
|
|
1267 for (uint i=0; i < len;)
|
|
1268 {
|
|
1269 dchar c = charAt(i++);
|
|
1270 if (c == 0x005C)
|
|
1271 {
|
|
1272 // bump index ...
|
|
1273 c = u_unescapeAt (&_charAt, &i, len, cast(void*) this);
|
|
1274
|
|
1275 // error?
|
|
1276 if (c == 0xFFFFFFFF)
|
|
1277 {
|
|
1278 result.truncate (); // return empty string
|
|
1279 break; // invalid escape sequence
|
|
1280 }
|
|
1281 }
|
|
1282 result.append (c);
|
|
1283 }
|
|
1284 return result;
|
|
1285 }
|
|
1286
|
|
1287 /***********************************************************************
|
|
1288
|
|
1289 Is this code point a surrogate (U+d800..U+dfff)?
|
|
1290
|
|
1291 ***********************************************************************/
|
|
1292
|
|
1293 final static bool isSurrogate (wchar c)
|
|
1294 {
|
|
1295 return (c & 0xfffff800) == 0xd800;
|
|
1296 }
|
|
1297
|
|
1298 /***********************************************************************
|
|
1299
|
|
1300 Is this code unit a lead surrogate (U+d800..U+dbff)?
|
|
1301
|
|
1302 ***********************************************************************/
|
|
1303
|
|
1304 final static bool isLeading (wchar c)
|
|
1305 {
|
|
1306 return (c & 0xfffffc00) == 0xd800;
|
|
1307 }
|
|
1308
|
|
1309 /***********************************************************************
|
|
1310
|
|
1311 Is this code unit a trail surrogate (U+dc00..U+dfff)?
|
|
1312
|
|
1313 ***********************************************************************/
|
|
1314
|
|
1315 final static bool isTrailing (wchar c)
|
|
1316 {
|
|
1317 return (c & 0xfffffc00) == 0xdc00;
|
|
1318 }
|
|
1319
|
|
1320 /***********************************************************************
|
|
1321
|
|
1322 Adjust a random-access offset to a code point boundary
|
|
1323 at the start of a code point. If the offset points to
|
|
1324 the trail surrogate of a surrogate pair, then the offset
|
|
1325 is decremented. Otherwise, it is not modified.
|
|
1326
|
|
1327 ***********************************************************************/
|
|
1328
|
|
1329 final uint getCharStart (uint i)
|
|
1330 in {
|
|
1331 if (i >= len)
|
|
1332 exception ("index of out bounds");
|
|
1333 }
|
|
1334 body
|
|
1335 {
|
|
1336 if (isTrailing (content[i]) && i && isLeading (content[i-1]))
|
|
1337 --i;
|
|
1338 return i;
|
|
1339 }
|
|
1340
|
|
1341 /***********************************************************************
|
|
1342
|
|
1343 Adjust a random-access offset to a code point boundary
|
|
1344 after a code point. If the offset is behind the lead
|
|
1345 surrogate of a surrogate pair, then the offset is
|
|
1346 incremented. Otherwise, it is not modified.
|
|
1347
|
|
1348 ***********************************************************************/
|
|
1349
|
|
1350 final uint getCharLimit (uint i)
|
|
1351 in {
|
|
1352 if (i >= len)
|
|
1353 exception ("index of out bounds");
|
|
1354 }
|
|
1355 body
|
|
1356 {
|
|
1357 if (i && isLeading(content[i-1]) && isTrailing (content[i]))
|
|
1358 ++i;
|
|
1359 return i;
|
|
1360 }
|
|
1361
|
|
1362 /***********************************************************************
|
|
1363
|
|
1364 Callback for C unescapeAt() function
|
|
1365
|
|
1366 ***********************************************************************/
|
|
1367
|
|
1368 extern (C)
|
|
1369 {
|
|
1370 typedef wchar function (uint offset, void* context) CharAt;
|
|
1371
|
|
1372 private static wchar _charAt (uint offset, void* context)
|
|
1373 {
|
|
1374 return (cast(UString) context).charAt (offset);
|
|
1375 }
|
|
1376 }
|
|
1377
|
|
1378 /***********************************************************************
|
|
1379
|
|
1380 Pin the given index to a valid position.
|
|
1381
|
|
1382 ***********************************************************************/
|
|
1383
|
|
1384 final private void pinIndex (inout uint x)
|
|
1385 {
|
|
1386 if (x > len)
|
|
1387 x = len;
|
|
1388 }
|
|
1389
|
|
1390 /***********************************************************************
|
|
1391
|
|
1392 Pin the given index and length to a valid position.
|
|
1393
|
|
1394 ***********************************************************************/
|
|
1395
|
|
1396 final private void pinIndices (inout uint start, inout uint length)
|
|
1397 {
|
|
1398 if (start > len)
|
|
1399 start = len;
|
|
1400
|
|
1401 if (length > (len - start))
|
|
1402 length = len - start;
|
|
1403 }
|
|
1404
|
|
1405 /***********************************************************************
|
|
1406
|
|
1407 Helper for comparison methods
|
|
1408
|
|
1409 ***********************************************************************/
|
|
1410
|
|
1411 final private int compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default)
|
|
1412 {
|
|
1413 UErrorCode e;
|
|
1414
|
|
1415 int x = u_strCaseCompare (s1.ptr, s1.length, s2.ptr, s2.length, option, e);
|
|
1416 testError (e, "compareFolded failed");
|
|
1417 return x;
|
|
1418 }
|
|
1419
|
|
1420
|
|
1421 /***********************************************************************
|
|
1422
|
|
1423 Bind the ICU functions from a shared library. This is
|
|
1424 complicated by the issues regarding D and DLLs on the
|
|
1425 Windows platform
|
|
1426
|
|
1427 ***********************************************************************/
|
|
1428
|
|
1429 private static void* library;
|
|
1430
|
|
1431 /***********************************************************************
|
|
1432
|
|
1433 ***********************************************************************/
|
|
1434
|
|
1435 private static extern (C)
|
|
1436 {
|
|
1437 wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst;
|
|
1438 wchar* function (wchar*, uint, wchar*, uint) u_strFindLast;
|
|
1439 wchar* function (wchar*, wchar, uint) u_memchr;
|
|
1440 wchar* function (wchar*, wchar, uint) u_memrchr;
|
|
1441 int function (wchar*, uint, wchar*, uint, bool) u_strCompare;
|
|
1442 int function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strCaseCompare;
|
|
1443 dchar function (CharAt, uint*, uint, void*) u_unescapeAt;
|
|
1444 uint function (wchar*, uint) u_countChar32;
|
|
1445 uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToUpper;
|
|
1446 uint function (wchar*, uint, wchar*, uint, char*, inout UErrorCode) u_strToLower;
|
|
1447 uint function (wchar*, uint, wchar*, uint, uint, inout UErrorCode) u_strFoldCase;
|
|
1448 wchar* function (wchar*, uint, uint*, char*, uint, inout UErrorCode) u_strFromUTF8;
|
|
1449 char* function (char*, uint, uint*, wchar*, uint, inout UErrorCode) u_strToUTF8;
|
|
1450 }
|
|
1451
|
|
1452 /***********************************************************************
|
|
1453
|
|
1454 ***********************************************************************/
|
|
1455
|
|
1456 static FunctionLoader.Bind[] targets =
|
|
1457 [
|
|
1458 {cast(void**) &u_strFindFirst, "u_strFindFirst"},
|
|
1459 {cast(void**) &u_strFindLast, "u_strFindLast"},
|
|
1460 {cast(void**) &u_memchr, "u_memchr"},
|
|
1461 {cast(void**) &u_memrchr, "u_memrchr"},
|
|
1462 {cast(void**) &u_strCompare, "u_strCompare"},
|
|
1463 {cast(void**) &u_strCaseCompare, "u_strCaseCompare"},
|
|
1464 {cast(void**) &u_unescapeAt, "u_unescapeAt"},
|
|
1465 {cast(void**) &u_countChar32, "u_countChar32"},
|
|
1466 {cast(void**) &u_strToUpper, "u_strToUpper"},
|
|
1467 {cast(void**) &u_strToLower, "u_strToLower"},
|
|
1468 {cast(void**) &u_strFoldCase, "u_strFoldCase"},
|
|
1469 {cast(void**) &u_strFromUTF8, "u_strFromUTF8"},
|
|
1470 {cast(void**) &u_strToUTF8, "u_strToUTF8"},
|
|
1471 ];
|
|
1472
|
|
1473 /***********************************************************************
|
|
1474
|
|
1475 ***********************************************************************/
|
|
1476
|
|
1477 static this ()
|
|
1478 {
|
|
1479 library = FunctionLoader.bind (icuuc, targets);
|
|
1480 //test ();
|
|
1481 }
|
|
1482
|
|
1483 /***********************************************************************
|
|
1484
|
|
1485 ***********************************************************************/
|
|
1486
|
|
1487 static ~this ()
|
|
1488 {
|
|
1489 FunctionLoader.unbind (library);
|
|
1490 }
|
|
1491
|
|
1492 /***********************************************************************
|
|
1493
|
|
1494 ***********************************************************************/
|
|
1495
|
|
1496 private static void test()
|
|
1497 {
|
|
1498 UString s = new UString (r"aaaqw \uabcd eaaa");
|
|
1499 char[] x = "dssfsdff";
|
|
1500 s ~ x ~ x;
|
|
1501 wchar c = s[3];
|
|
1502 s[3] = 'Q';
|
|
1503 int y = s.indexOf ("qwe");
|
|
1504 s.unEscape ();
|
|
1505 s.toUpper (new UString);
|
|
1506 s.padLeading(2).padTrailing(2).trim();
|
|
1507 }
|
|
1508 }
|