Mercurial > projects > dwt-win
annotate dwt/dwthelper/XmlTranscode.d @ 212:ab60f3309436
reverted the char[] to String and use the an alias.
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Mon, 05 May 2008 00:12:38 +0200 |
parents | be4ce760802a |
children |
rev | line source |
---|---|
198 | 1 module dwt.dwthelper.XmlTranscode; |
2 | |
3 import dwt.dwthelper.utils; | |
4 import tango.core.Exception; | |
5 | |
6 /++ | |
7 + Decode XML entities into UTF8 string. | |
8 + Eg. "&" -> "&", "&" -> "&", "&" -> "&" | |
9 + Throws TextException on failure | |
10 + The given string is modified. | |
11 +/ | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
12 String xmlUnescape( String str ){ |
198 | 13 |
14 void error(){ | |
15 throw new TextException( "xmlUnescape" ); | |
16 } | |
17 // < ... | |
18 // Ӓ | |
19 // ኯ | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
20 String src = str; |
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
21 String trg = str; |
198 | 22 while( src.length ){ |
23 if( src[0] !is '&' ){ | |
24 trg[0] = src[0]; | |
25 trg = trg[1..$]; | |
26 src = src[1..$]; | |
27 } | |
28 else{ | |
29 src = src[1..$]; // go past '&' | |
30 if( src.length < 2 ) error(); | |
31 | |
32 // search semi | |
33 int len = Math.min( src.length, 10 ); // limit semi search to possible longest entityname | |
34 int semi = tango.text.Util.locate( src[0 .. len ], ';' ); | |
35 if( semi is len ) error(); // no semi found | |
36 | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
37 String entityName = src[ 0 .. semi ]; // name without semi |
198 | 38 dchar entityValue = 0; |
39 switch( entityName ){ | |
40 case "lt": entityValue = '<'; break; | |
41 case "gt": entityValue = '>'; break; | |
42 case "amp": entityValue = '&'; break; | |
43 case "quot": entityValue = '\"'; break; | |
44 case "apos": entityValue = '\''; break; | |
45 default: | |
46 if( entityName[0] is 'x' ){ | |
47 if( semi < 2 ) error(); | |
48 if( semi > 9 ) error(); | |
49 foreach( hex; entityName[1..$] ){ | |
50 entityValue <<= 4; | |
51 if( hex >= '0' && hex <= '9' ){ | |
52 entityValue |= ( hex - '0' ); | |
53 } | |
54 else if( hex >= 'a' && hex <= 'f' ){ | |
55 entityValue |= ( hex - 'a' ); | |
56 } | |
57 else if( hex >= 'A' && hex <= 'F' ){ | |
58 entityValue |= ( hex - 'A' ); | |
59 } | |
60 else{ | |
61 error(); | |
62 } | |
63 } | |
64 } | |
65 else{ | |
66 if( semi < 1 ) error(); | |
67 if( semi > 9 ) error(); | |
68 foreach( dec; entityName[1..$] ){ | |
69 if( dec >= '0' && dec <= '9' ){ | |
70 entityValue *= 10; | |
71 entityValue += ( dec - '0' ); | |
72 } | |
73 else{ | |
74 error(); | |
75 } | |
76 } | |
77 } | |
78 } | |
79 dchar[1] arr; | |
80 arr[0] = entityValue; | |
81 uint ate = 0; | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
82 String res = tango.text.convert.Utf.toString( arr, trg, &ate ); |
198 | 83 trg = trg[ res.length .. $ ]; |
84 src = src[ semi +1 .. $ ]; // go past semi | |
85 } | |
86 } | |
87 return str[ 0 .. trg.ptr-str.ptr ]; | |
88 } | |
89 | |
90 | |
91 /++ | |
92 + Encode XML entities into UTF8 string. | |
93 + First checks if processing is needed. | |
94 + If not, the original string is returned. | |
95 + If processing is needed, a new string is allocated. | |
96 +/ | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
97 String xmlEscape( String xml ){ |
198 | 98 bool needsReplacement( dchar c ){ |
99 switch( c ){ | |
100 case '<': | |
101 case '>': | |
102 case '&': | |
103 case '\"': | |
104 case '\'': | |
105 case '\r': | |
106 case '\n': | |
107 case '\u0009': | |
108 return true; | |
109 default: | |
110 return c > 0x7F; | |
111 } | |
112 } | |
113 | |
114 // Check if processing is needed | |
115 foreach( char c; xml ){ | |
116 if( needsReplacement( c )){ | |
117 goto Lprocess; | |
118 } | |
119 } | |
120 return xml; | |
121 Lprocess: | |
122 | |
123 // yes, do a new string, start with +20 chars | |
212
ab60f3309436
reverted the char[] to String and use the an alias.
Frank Benoit <benoit@tionex.de>
parents:
198
diff
changeset
|
124 String res = new char[ xml.length + 20 ]; |
198 | 125 res.length = 0; |
126 | |
127 foreach( dchar c; xml ){ | |
128 | |
129 if( !needsReplacement( c )){ | |
130 res ~= c; | |
131 } | |
132 else{ | |
133 res ~= '&'; | |
134 switch( c ){ | |
135 case '<': res ~= "lt"; break; | |
136 case '>': res ~= "gt"; break; | |
137 case '&': res ~= "amp"; break; | |
138 case '\"': res ~= "quot"; break; | |
139 case '\'': res ~= "apos"; break; | |
140 case '\r': case '\n': case '\u0009': | |
141 default: | |
142 char toHexDigit( int i ){ | |
143 if( i < 10 ) return '0'+i; | |
144 return 'A'+i-10; | |
145 } | |
146 res ~= "#x"; | |
147 if( c <= 0xFF ){ | |
148 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
149 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
150 } | |
151 else if( c <= 0xFFFF ){ | |
152 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
153 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
154 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
155 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
156 } | |
157 else if( c <= 0xFFFFFF ){ | |
158 res ~= toHexDigit(( c >> 20 ) & 0x0F ); | |
159 res ~= toHexDigit(( c >> 16 ) & 0x0F ); | |
160 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
161 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
162 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
163 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
164 } | |
165 else { | |
166 res ~= toHexDigit(( c >> 28 ) & 0x0F ); | |
167 res ~= toHexDigit(( c >> 24 ) & 0x0F ); | |
168 res ~= toHexDigit(( c >> 20 ) & 0x0F ); | |
169 res ~= toHexDigit(( c >> 16 ) & 0x0F ); | |
170 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
171 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
172 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
173 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
174 } | |
175 break; | |
176 } | |
177 res ~= ';'; | |
178 } | |
179 } | |
180 } | |
181 |