Mercurial > projects > ldc
annotate druntime/src/compiler/dmd/util/utf.d @ 1458:e0b2d67cfe7c
Added druntime (this should be removed once it works).
author | Robert Clipsham <robert@octarineparrot.com> |
---|---|
date | Tue, 02 Jun 2009 17:43:06 +0100 |
parents | |
children |
rev | line source |
---|---|
1458
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
1 /******************************************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
2 * Encode and decode UTF-8, UTF-16 and UTF-32 strings. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
3 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
4 * For Win32 systems, the C wchar_t type is UTF-16 and corresponds to the D |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
5 * wchar type. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
6 * For Posix systems, the C wchar_t type is UTF-32 and corresponds to |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
7 * the D utf.dchar type. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
8 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
9 * UTF character support is restricted to (\u0000 <= character <= \U0010FFFF). |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
10 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
11 * See_Also: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
12 * $(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia)<br> |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
13 * $(LINK http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8)<br> |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
14 * $(LINK http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
15 * Macros: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
16 * WIKI = Phobos/StdUtf |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
17 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
18 * Copyright: Copyright Digital Mars 2003 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
19 * License: <a href="http://www.boost.org/LICENSE_1_0.txt>Boost License 1.0</a>. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
20 * Authors: Walter Bright, Sean Kelly |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
21 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
22 * Copyright Digital Mars 2003 - 2009. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
23 * Distributed under the Boost Software License, Version 1.0. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
24 * (See accompanying file LICENSE_1_0.txt or copy at |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
25 * http://www.boost.org/LICENSE_1_0.txt) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
26 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
27 module rt.util.utf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
28 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
29 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
30 extern (C) void onUnicodeError( string msg, size_t idx ); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
31 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
32 /******************************* |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
33 * Test if c is a valid UTF-32 character. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
34 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
35 * \uFFFE and \uFFFF are considered valid by this function, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
36 * as they are permitted for internal use by an application, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
37 * but they are not allowed for interchange by the Unicode standard. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
38 * |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
39 * Returns: true if it is, false if not. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
40 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
41 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
42 bool isValidDchar(dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
43 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
44 /* Note: FFFE and FFFF are specifically permitted by the |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
45 * Unicode standard for application internal use, but are not |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
46 * allowed for interchange. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
47 * (thanks to Arcane Jill) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
48 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
49 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
50 return c < 0xD800 || |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
51 (c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
52 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
53 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
54 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
55 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
56 debug(utf) printf("utf.isValidDchar.unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
57 assert(isValidDchar(cast(dchar)'a') == true); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
58 assert(isValidDchar(cast(dchar)0x1FFFFF) == false); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
59 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
60 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
61 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
62 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
63 immutable UTF8stride = |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
64 [ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
65 cast(ubyte) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
66 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
67 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
68 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
69 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
70 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
71 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
72 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
73 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
74 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
75 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
76 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
77 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
78 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
79 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
80 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
81 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
82 ]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
83 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
84 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
85 * stride() returns the length of a UTF-8 sequence starting at index i |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
86 * in string s. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
87 * Returns: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
88 * The number of bytes in the UTF-8 sequence or |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
89 * 0xFF meaning s[i] is not the start of of UTF-8 sequence. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
90 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
91 uint stride(in char[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
92 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
93 return UTF8stride[s[i]]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
94 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
95 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
96 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
97 * stride() returns the length of a UTF-16 sequence starting at index i |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
98 * in string s. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
99 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
100 uint stride(in wchar[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
101 { uint u = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
102 return 1 + (u >= 0xD800 && u <= 0xDBFF); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
103 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
104 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
105 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
106 * stride() returns the length of a UTF-32 sequence starting at index i |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
107 * in string s. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
108 * Returns: The return value will always be 1. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
109 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
110 uint stride(in dchar[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
111 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
112 return 1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
113 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
114 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
115 /******************************************* |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
116 * Given an index i into an array of characters s[], |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
117 * and assuming that index i is at the start of a UTF character, |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
118 * determine the number of UCS characters up to that index i. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
119 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
120 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
121 size_t toUCSindex(in char[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
122 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
123 size_t n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
124 size_t j; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
125 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
126 for (j = 0; j < i; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
127 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
128 j += stride(s, j); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
129 n++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
130 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
131 if (j > i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
132 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
133 onUnicodeError("invalid UTF-8 sequence", j); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
134 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
135 return n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
136 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
137 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
138 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
139 size_t toUCSindex(in wchar[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
140 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
141 size_t n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
142 size_t j; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
143 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
144 for (j = 0; j < i; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
145 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
146 j += stride(s, j); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
147 n++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
148 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
149 if (j > i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
150 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
151 onUnicodeError("invalid UTF-16 sequence", j); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
152 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
153 return n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
154 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
155 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
156 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
157 size_t toUCSindex(in dchar[] s, size_t i) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
158 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
159 return i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
160 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
161 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
162 /****************************************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
163 * Given a UCS index n into an array of characters s[], return the UTF index. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
164 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
165 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
166 size_t toUTFindex(in char[] s, size_t n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
167 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
168 size_t i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
169 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
170 while (n--) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
171 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
172 uint j = UTF8stride[s[i]]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
173 if (j == 0xFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
174 onUnicodeError("invalid UTF-8 sequence", i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
175 i += j; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
176 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
177 return i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
178 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
179 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
180 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
181 size_t toUTFindex(in wchar[] s, size_t n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
182 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
183 size_t i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
184 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
185 while (n--) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
186 { wchar u = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
187 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
188 i += 1 + (u >= 0xD800 && u <= 0xDBFF); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
189 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
190 return i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
191 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
192 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
193 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
194 size_t toUTFindex(in dchar[] s, size_t n) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
195 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
196 return n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
197 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
198 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
199 /* =================== Decode ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
200 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
201 /*************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
202 * Decodes and returns character starting at s[idx]. idx is advanced past the |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
203 * decoded character. If the character is not well formed, a UtfException is |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
204 * thrown and idx remains unchanged. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
205 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
206 dchar decode(in char[] s, inout size_t idx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
207 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
208 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
209 assert(idx >= 0 && idx < s.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
210 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
211 out (result) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
212 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
213 assert(isValidDchar(result)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
214 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
215 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
216 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
217 size_t len = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
218 dchar V; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
219 size_t i = idx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
220 char u = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
221 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
222 if (u & 0x80) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
223 { uint n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
224 char u2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
225 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
226 /* The following encodings are valid, except for the 5 and 6 byte |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
227 * combinations: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
228 * 0xxxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
229 * 110xxxxx 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
230 * 1110xxxx 10xxxxxx 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
231 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
232 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
233 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
234 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
235 for (n = 1; ; n++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
236 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
237 if (n > 4) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
238 goto Lerr; // only do the first 4 of 6 encodings |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
239 if (((u << n) & 0x80) == 0) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
240 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
241 if (n == 1) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
242 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
243 break; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
244 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
245 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
246 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
247 // Pick off (7 - n) significant bits of B from first byte of octet |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
248 V = cast(dchar)(u & ((1 << (7 - n)) - 1)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
249 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
250 if (i + (n - 1) >= len) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
251 goto Lerr; // off end of string |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
252 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
253 /* The following combinations are overlong, and illegal: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
254 * 1100000x (10xxxxxx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
255 * 11100000 100xxxxx (10xxxxxx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
256 * 11110000 1000xxxx (10xxxxxx 10xxxxxx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
257 * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
258 * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
259 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
260 u2 = s[i + 1]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
261 if ((u & 0xFE) == 0xC0 || |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
262 (u == 0xE0 && (u2 & 0xE0) == 0x80) || |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
263 (u == 0xF0 && (u2 & 0xF0) == 0x80) || |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
264 (u == 0xF8 && (u2 & 0xF8) == 0x80) || |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
265 (u == 0xFC && (u2 & 0xFC) == 0x80)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
266 goto Lerr; // overlong combination |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
267 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
268 for (uint j = 1; j != n; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
269 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
270 u = s[i + j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
271 if ((u & 0xC0) != 0x80) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
272 goto Lerr; // trailing bytes are 10xxxxxx |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
273 V = (V << 6) | (u & 0x3F); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
274 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
275 if (!isValidDchar(V)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
276 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
277 i += n; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
278 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
279 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
280 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
281 V = cast(dchar) u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
282 i++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
283 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
284 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
285 idx = i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
286 return V; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
287 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
288 Lerr: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
289 onUnicodeError("invalid UTF-8 sequence", i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
290 return V; // dummy return |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
291 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
292 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
293 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
294 { size_t i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
295 dchar c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
296 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
297 debug(utf) printf("utf.decode.unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
298 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
299 static s1 = "abcd"c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
300 i = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
301 c = decode(s1, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
302 assert(c == cast(dchar)'a'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
303 assert(i == 1); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
304 c = decode(s1, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
305 assert(c == cast(dchar)'b'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
306 assert(i == 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
307 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
308 static s2 = "\xC2\xA9"c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
309 i = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
310 c = decode(s2, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
311 assert(c == cast(dchar)'\u00A9'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
312 assert(i == 2); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
313 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
314 static s3 = "\xE2\x89\xA0"c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
315 i = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
316 c = decode(s3, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
317 assert(c == cast(dchar)'\u2260'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
318 assert(i == 3); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
319 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
320 static s4 = |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
321 [ "\xE2\x89"c[], // too short |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
322 "\xC0\x8A", |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
323 "\xE0\x80\x8A", |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
324 "\xF0\x80\x80\x8A", |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
325 "\xF8\x80\x80\x80\x8A", |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
326 "\xFC\x80\x80\x80\x80\x8A", |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
327 ]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
328 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
329 for (int j = 0; j < s4.length; j++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
330 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
331 try |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
332 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
333 i = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
334 c = decode(s4[j], i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
335 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
336 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
337 catch (Object o) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
338 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
339 i = 23; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
340 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
341 assert(i == 23); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
342 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
343 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
344 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
345 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
346 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
347 dchar decode(in wchar[] s, inout size_t idx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
348 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
349 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
350 assert(idx >= 0 && idx < s.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
351 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
352 out (result) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
353 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
354 assert(isValidDchar(result)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
355 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
356 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
357 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
358 string msg; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
359 dchar V; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
360 size_t i = idx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
361 uint u = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
362 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
363 if (u & ~0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
364 { if (u >= 0xD800 && u <= 0xDBFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
365 { uint u2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
366 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
367 if (i + 1 == s.length) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
368 { msg = "surrogate UTF-16 high value past end of string"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
369 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
370 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
371 u2 = s[i + 1]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
372 if (u2 < 0xDC00 || u2 > 0xDFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
373 { msg = "surrogate UTF-16 low value out of range"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
374 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
375 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
376 u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
377 i += 2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
378 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
379 else if (u >= 0xDC00 && u <= 0xDFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
380 { msg = "unpaired surrogate UTF-16 value"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
381 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
382 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
383 else if (u == 0xFFFE || u == 0xFFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
384 { msg = "illegal UTF-16 value"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
385 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
386 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
387 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
388 i++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
389 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
390 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
391 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
392 i++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
393 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
394 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
395 idx = i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
396 return cast(dchar)u; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
397 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
398 Lerr: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
399 onUnicodeError(msg, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
400 return cast(dchar)u; // dummy return |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
401 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
402 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
403 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
404 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
405 dchar decode(in dchar[] s, inout size_t idx) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
406 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
407 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
408 assert(idx >= 0 && idx < s.length); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
409 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
410 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
411 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
412 size_t i = idx; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
413 dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
414 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
415 if (!isValidDchar(c)) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
416 goto Lerr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
417 idx = i + 1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
418 return c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
419 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
420 Lerr: |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
421 onUnicodeError("invalid UTF-32 value", i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
422 return c; // dummy return |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
423 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
424 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
425 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
426 /* =================== Encode ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
427 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
428 /******************************* |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
429 * Encodes character c and appends it to array s[]. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
430 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
431 void encode(inout char[] s, dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
432 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
433 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
434 assert(isValidDchar(c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
435 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
436 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
437 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
438 char[] r = s; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
439 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
440 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
441 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
442 r ~= cast(char) c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
443 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
444 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
445 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
446 char[4] buf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
447 uint L; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
448 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
449 if (c <= 0x7FF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
450 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
451 buf[0] = cast(char)(0xC0 | (c >> 6)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
452 buf[1] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
453 L = 2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
454 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
455 else if (c <= 0xFFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
456 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
457 buf[0] = cast(char)(0xE0 | (c >> 12)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
458 buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
459 buf[2] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
460 L = 3; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
461 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
462 else if (c <= 0x10FFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
463 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
464 buf[0] = cast(char)(0xF0 | (c >> 18)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
465 buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
466 buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
467 buf[3] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
468 L = 4; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
469 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
470 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
471 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
472 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
473 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
474 r ~= buf[0 .. L]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
475 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
476 s = r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
477 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
478 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
479 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
480 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
481 debug(utf) printf("utf.encode.unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
482 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
483 char[] s = "abcd".dup; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
484 encode(s, cast(dchar)'a'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
485 assert(s.length == 5); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
486 assert(s == "abcda"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
487 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
488 encode(s, cast(dchar)'\u00A9'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
489 assert(s.length == 7); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
490 assert(s == "abcda\xC2\xA9"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
491 //assert(s == "abcda\u00A9"); // BUG: fix compiler |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
492 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
493 encode(s, cast(dchar)'\u2260'); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
494 assert(s.length == 10); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
495 assert(s == "abcda\xC2\xA9\xE2\x89\xA0"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
496 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
497 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
498 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
499 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
500 void encode(inout wchar[] s, dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
501 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
502 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
503 assert(isValidDchar(c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
504 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
505 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
506 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
507 wchar[] r = s; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
508 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
509 if (c <= 0xFFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
510 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
511 r ~= cast(wchar) c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
512 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
513 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
514 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
515 wchar[2] buf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
516 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
517 buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
518 buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
519 r ~= buf; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
520 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
521 s = r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
522 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
523 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
524 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
525 void encode(inout dchar[] s, dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
526 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
527 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
528 assert(isValidDchar(c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
529 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
530 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
531 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
532 s ~= c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
533 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
534 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
535 /** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
536 Returns the code length of $(D c) in the encoding using $(D C) as a |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
537 code point. The code is returned in character count, not in bytes. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
538 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
539 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
540 ubyte codeLength(C)(dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
541 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
542 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
543 static if (C.sizeof == 1) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
544 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
545 return |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
546 c <= 0x7F ? 1 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
547 : c <= 0x7FF ? 2 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
548 : c <= 0xFFFF ? 3 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
549 : c <= 0x10FFFF ? 4 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
550 : (assert(false), 6); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
551 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
552 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
553 else static if (C.sizeof == 2) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
554 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
555 return c <= 0xFFFF ? 1 : 2; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
556 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
557 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
558 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
559 static assert(C.sizeof == 4); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
560 return 1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
561 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
562 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
563 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
564 /* =================== Validation ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
565 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
566 /*********************************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
567 Checks to see if string is well formed or not. $(D S) can be an array |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
568 of $(D char), $(D wchar), or $(D dchar). Throws a $(D UtfException) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
569 if it is not. Use to check all untrusted input for correctness. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
570 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
571 void validate(S)(in S s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
572 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
573 auto len = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
574 for (size_t i = 0; i < len; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
575 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
576 decode(s, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
577 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
578 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
579 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
580 /* =================== Conversion to UTF8 ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
581 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
582 char[] toUTF8(char[4] buf, dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
583 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
584 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
585 assert(isValidDchar(c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
586 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
587 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
588 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
589 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
590 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
591 buf[0] = cast(char) c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
592 return buf[0 .. 1]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
593 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
594 else if (c <= 0x7FF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
595 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
596 buf[0] = cast(char)(0xC0 | (c >> 6)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
597 buf[1] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
598 return buf[0 .. 2]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
599 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
600 else if (c <= 0xFFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
601 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
602 buf[0] = cast(char)(0xE0 | (c >> 12)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
603 buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
604 buf[2] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
605 return buf[0 .. 3]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
606 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
607 else if (c <= 0x10FFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
608 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
609 buf[0] = cast(char)(0xF0 | (c >> 18)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
610 buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
611 buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
612 buf[3] = cast(char)(0x80 | (c & 0x3F)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
613 return buf[0 .. 4]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
614 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
615 assert(0); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
616 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
617 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
618 /******************* |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
619 * Encodes string s into UTF-8 and returns the encoded string. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
620 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
621 string toUTF8(string s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
622 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
623 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
624 validate(s); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
625 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
626 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
627 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
628 return s; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
629 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
630 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
631 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
632 string toUTF8(in wchar[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
633 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
634 char[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
635 size_t i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
636 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
637 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
638 r.length = slen; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
639 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
640 for (i = 0; i < slen; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
641 { wchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
642 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
643 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
644 r[i] = cast(char)c; // fast path for ascii |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
645 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
646 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
647 r.length = i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
648 foreach (dchar c; s[i .. slen]) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
649 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
650 encode(r, c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
651 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
652 break; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
653 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
654 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
655 return cast(string)r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
656 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
657 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
658 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
659 string toUTF8(in dchar[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
660 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
661 char[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
662 size_t i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
663 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
664 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
665 r.length = slen; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
666 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
667 for (i = 0; i < slen; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
668 { dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
669 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
670 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
671 r[i] = cast(char)c; // fast path for ascii |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
672 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
673 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
674 r.length = i; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
675 foreach (dchar d; s[i .. slen]) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
676 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
677 encode(r, d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
678 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
679 break; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
680 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
681 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
682 return cast(string)r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
683 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
684 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
685 /* =================== Conversion to UTF16 ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
686 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
687 wchar[] toUTF16(wchar[2] buf, dchar c) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
688 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
689 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
690 assert(isValidDchar(c)); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
691 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
692 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
693 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
694 if (c <= 0xFFFF) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
695 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
696 buf[0] = cast(wchar) c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
697 return buf[0 .. 1]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
698 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
699 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
700 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
701 buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
702 buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
703 return buf[0 .. 2]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
704 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
705 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
706 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
707 /**************** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
708 * Encodes string s into UTF-16 and returns the encoded string. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
709 * toUTF16z() is suitable for calling the 'W' functions in the Win32 API that take |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
710 * an LPWSTR or LPCWSTR argument. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
711 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
712 wstring toUTF16(in char[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
713 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
714 wchar[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
715 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
716 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
717 r.length = slen; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
718 r.length = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
719 for (size_t i = 0; i < slen; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
720 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
721 dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
722 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
723 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
724 i++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
725 r ~= cast(wchar)c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
726 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
727 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
728 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
729 c = decode(s, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
730 encode(r, c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
731 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
732 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
733 return cast(wstring)r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
734 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
735 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
736 alias const(wchar)* wptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
737 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
738 wptr toUTF16z(in char[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
739 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
740 wchar[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
741 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
742 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
743 r.length = slen + 1; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
744 r.length = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
745 for (size_t i = 0; i < slen; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
746 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
747 dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
748 if (c <= 0x7F) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
749 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
750 i++; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
751 r ~= cast(wchar)c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
752 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
753 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
754 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
755 c = decode(s, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
756 encode(r, c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
757 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
758 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
759 r ~= "\000"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
760 return r.ptr; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
761 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
762 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
763 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
764 wstring toUTF16(wstring s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
765 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
766 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
767 validate(s); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
768 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
769 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
770 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
771 return s; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
772 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
773 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
774 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
775 wstring toUTF16(in dchar[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
776 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
777 wchar[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
778 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
779 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
780 r.length = slen; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
781 r.length = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
782 for (size_t i = 0; i < slen; i++) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
783 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
784 encode(r, s[i]); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
785 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
786 return cast(wstring)r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
787 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
788 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
789 /* =================== Conversion to UTF32 ======================= */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
790 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
791 /***** |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
792 * Encodes string s into UTF-32 and returns the encoded string. |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
793 */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
794 dstring toUTF32(in char[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
795 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
796 dchar[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
797 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
798 size_t j = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
799 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
800 r.length = slen; // r[] will never be longer than s[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
801 for (size_t i = 0; i < slen; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
802 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
803 dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
804 if (c >= 0x80) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
805 c = decode(s, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
806 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
807 i++; // c is ascii, no need for decode |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
808 r[j++] = c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
809 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
810 return cast(dstring)r[0 .. j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
811 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
812 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
813 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
814 dstring toUTF32(in wchar[] s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
815 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
816 dchar[] r; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
817 size_t slen = s.length; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
818 size_t j = 0; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
819 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
820 r.length = slen; // r[] will never be longer than s[] |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
821 for (size_t i = 0; i < slen; ) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
822 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
823 dchar c = s[i]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
824 if (c >= 0x80) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
825 c = decode(s, i); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
826 else |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
827 i++; // c is ascii, no need for decode |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
828 r[j++] = c; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
829 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
830 return cast(dstring)r[0 .. j]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
831 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
832 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
833 /** ditto */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
834 dstring toUTF32(dstring s) |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
835 in |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
836 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
837 validate(s); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
838 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
839 body |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
840 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
841 return s; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
842 } |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
843 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
844 /* ================================ tests ================================== */ |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
845 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
846 unittest |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
847 { |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
848 debug(utf) printf("utf.toUTF.unittest\n"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
849 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
850 auto c = "hello"c[]; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
851 auto w = toUTF16(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
852 assert(w == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
853 auto d = toUTF32(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
854 assert(d == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
855 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
856 c = toUTF8(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
857 assert(c == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
858 d = toUTF32(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
859 assert(d == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
860 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
861 c = toUTF8(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
862 assert(c == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
863 w = toUTF16(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
864 assert(w == "hello"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
865 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
866 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
867 c = "hel\u1234o"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
868 w = toUTF16(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
869 assert(w == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
870 d = toUTF32(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
871 assert(d == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
872 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
873 c = toUTF8(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
874 assert(c == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
875 d = toUTF32(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
876 assert(d == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
877 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
878 c = toUTF8(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
879 assert(c == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
880 w = toUTF16(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
881 assert(w == "hel\u1234o"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
882 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
883 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
884 c = "he\U0010AAAAllo"; |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
885 w = toUTF16(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
886 //foreach (wchar c; w) printf("c = x%x\n", c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
887 //foreach (wchar c; cast(wstring)"he\U0010AAAAllo") printf("c = x%x\n", c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
888 assert(w == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
889 d = toUTF32(c); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
890 assert(d == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
891 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
892 c = toUTF8(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
893 assert(c == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
894 d = toUTF32(w); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
895 assert(d == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
896 |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
897 c = toUTF8(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
898 assert(c == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
899 w = toUTF16(d); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
900 assert(w == "he\U0010AAAAllo"); |
e0b2d67cfe7c
Added druntime (this should be removed once it works).
Robert Clipsham <robert@octarineparrot.com>
parents:
diff
changeset
|
901 } |