Mercurial > projects > dwt2
comparison base/src/java/nonstandard/XmlTranscode.d @ 27:1bf55a6eb092
Renamed java tree to base
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Sat, 21 Mar 2009 11:33:57 +0100 |
parents | java/src/java/nonstandard/XmlTranscode.d@9b96950f2c3c |
children | 9f4c18c268b2 |
comparison
equal
deleted
inserted
replaced
26:f589fc20a5f9 | 27:1bf55a6eb092 |
---|---|
1 module java.nonstandard.XmlTranscode; | |
2 | |
3 import java.lang.util; | |
4 import java.lang.exceptions; | |
5 import java.lang.Math; | |
6 | |
7 /++ | |
8 + Decode XML entities into UTF8 string. | |
9 + Eg. "&" -> "&", "&" -> "&", "&" -> "&" | |
10 + Throws TextException on failure | |
11 + The given string is modified. | |
12 +/ | |
13 String xmlUnescape( String str ){ | |
14 | |
15 void error(){ | |
16 throw new RuntimeException( "xmlUnescape" ); | |
17 } | |
18 // < ... | |
19 // Ӓ | |
20 // ኯ | |
21 String src = str; | |
22 String trg = str; | |
23 while( src.length ){ | |
24 if( src[0] !is '&' ){ | |
25 //trg[0] = src[0]; | |
26 trg = trg[1..$]; | |
27 src = src[1..$]; | |
28 } | |
29 else{ | |
30 src = src[1..$]; // go past '&' | |
31 if( src.length < 2 ) error(); | |
32 | |
33 // search semi | |
34 int len = Math.min( cast(int)src.length, 10 ); // limit semi search to possible longest entityname | |
35 int semi = java.lang.util.indexOf( src[0 .. len ], ';' ); | |
36 if( semi is -1 ) error(); // no semi found | |
37 | |
38 String entityName = src[ 0 .. semi ]; // name without semi | |
39 dchar entityValue = 0; | |
40 switch( entityName ){ | |
41 case "lt": entityValue = '<'; break; | |
42 case "gt": entityValue = '>'; break; | |
43 case "amp": entityValue = '&'; break; | |
44 case "quot": entityValue = '\"'; break; | |
45 case "apos": entityValue = '\''; break; | |
46 default: | |
47 if( entityName[0] is 'x' ){ | |
48 if( semi < 2 ) error(); | |
49 if( semi > 9 ) error(); | |
50 foreach( hex; entityName[1..$] ){ | |
51 entityValue <<= 4; | |
52 if( hex >= '0' && hex <= '9' ){ | |
53 entityValue |= ( hex - '0' ); | |
54 } | |
55 else if( hex >= 'a' && hex <= 'f' ){ | |
56 entityValue |= ( hex - 'a' ); | |
57 } | |
58 else if( hex >= 'A' && hex <= 'F' ){ | |
59 entityValue |= ( hex - 'A' ); | |
60 } | |
61 else{ | |
62 error(); | |
63 } | |
64 } | |
65 } | |
66 else{ | |
67 if( semi < 1 ) error(); | |
68 if( semi > 9 ) error(); | |
69 foreach( dec; entityName[1..$] ){ | |
70 if( dec >= '0' && dec <= '9' ){ | |
71 entityValue *= 10; | |
72 entityValue += ( dec - '0' ); | |
73 } | |
74 else{ | |
75 error(); | |
76 } | |
77 } | |
78 } | |
79 } | |
80 String res = String_valueOf( entityValue ); | |
81 trg = trg[ res.length .. $ ]; | |
82 src = src[ semi +1 .. $ ]; // go past semi | |
83 } | |
84 } | |
85 return str[ 0 .. trg.ptr-str.ptr ]; | |
86 } | |
87 | |
88 | |
89 /++ | |
90 + Encode XML entities into UTF8 string. | |
91 + First checks if processing is needed. | |
92 + If not, the original string is returned. | |
93 + If processing is needed, a new string is allocated. | |
94 +/ | |
95 String xmlEscape( String xml ){ | |
96 bool needsReplacement( dchar c ){ | |
97 switch( c ){ | |
98 case '<': | |
99 case '>': | |
100 case '&': | |
101 case '\"': | |
102 case '\'': | |
103 case '\r': | |
104 case '\n': | |
105 case '\u0009': | |
106 return true; | |
107 default: | |
108 return c > 0x7F; | |
109 } | |
110 } | |
111 | |
112 // Check if processing is needed | |
113 foreach( char c; xml ){ | |
114 if( needsReplacement( c )){ | |
115 goto Lprocess; | |
116 } | |
117 } | |
118 return xml; | |
119 Lprocess: | |
120 | |
121 // yes, do a new string, start with +20 chars | |
122 char[] res = new char[ xml.length + 20 ]; | |
123 res.length = 0; | |
124 | |
125 foreach( dchar c; xml ){ | |
126 | |
127 if( !needsReplacement( c )){ | |
128 res ~= c; | |
129 } | |
130 else{ | |
131 res ~= '&'; | |
132 switch( c ){ | |
133 case '<': res ~= "lt"; break; | |
134 case '>': res ~= "gt"; break; | |
135 case '&': res ~= "amp"; break; | |
136 case '\"': res ~= "quot"; break; | |
137 case '\'': res ~= "apos"; break; | |
138 case '\r': case '\n': case '\u0009': | |
139 default: | |
140 char toHexDigit( int i ){ | |
141 if( i < 10 ) return '0'+i; | |
142 return 'A'+i-10; | |
143 } | |
144 res ~= "#x"; | |
145 if( c <= 0xFF ){ | |
146 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
147 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
148 } | |
149 else if( c <= 0xFFFF ){ | |
150 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
151 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
152 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
153 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
154 } | |
155 else if( c <= 0xFFFFFF ){ | |
156 res ~= toHexDigit(( c >> 20 ) & 0x0F ); | |
157 res ~= toHexDigit(( c >> 16 ) & 0x0F ); | |
158 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
159 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
160 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
161 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
162 } | |
163 else { | |
164 res ~= toHexDigit(( c >> 28 ) & 0x0F ); | |
165 res ~= toHexDigit(( c >> 24 ) & 0x0F ); | |
166 res ~= toHexDigit(( c >> 20 ) & 0x0F ); | |
167 res ~= toHexDigit(( c >> 16 ) & 0x0F ); | |
168 res ~= toHexDigit(( c >> 12 ) & 0x0F ); | |
169 res ~= toHexDigit(( c >> 8 ) & 0x0F ); | |
170 res ~= toHexDigit(( c >> 4 ) & 0x0F ); | |
171 res ~= toHexDigit(( c >> 0 ) & 0x0F ); | |
172 } | |
173 break; | |
174 } | |
175 res ~= ';'; | |
176 } | |
177 } | |
178 } | |
179 |