projects/dwt2: base/src/java/nonstandard/XmlTranscode.d comparison

comparison base/src/java/nonstandard/XmlTranscode.d @ 27:1bf55a6eb092

Renamed java tree to base

author	Frank Benoit <benoit@tionex.de>
date	Sat, 21 Mar 2009 11:33:57 +0100
parents	java/src/java/nonstandard/XmlTranscode.d@9b96950f2c3c
children	9f4c18c268b2

comparison

equal deleted inserted replaced

-:f589fc20a5f9
+:1bf55a6eb092
+module java.nonstandard.XmlTranscode;
+import java.lang.util;
+import java.lang.exceptions;
+import java.lang.Math;
+/++
++ Decode XML entities into UTF8 string.
++ Eg. "&amp;" -> "&", "&#38;" -> "&", "&#x26;" -> "&"
++ Throws TextException on failure
++ The given string is modified.
++/
+String xmlUnescape( String str ){
+void error(){
+throw new RuntimeException( "xmlUnescape" );
+}
+// &lt; ...
+// &#1234;
+// &#x12AF;
+String src = str;
+String trg = str;
+while( src.length ){
+if( src[0] !is '&' ){
+//trg[0] = src[0];
+trg = trg[1..$];
+src = src[1..$];
+}
+else{
+src = src[1..$]; //  go past '&'
+if( src.length < 2 ) error();
+// search semi
+int len = Math.min( cast(int)src.length, 10 ); // limit semi search to possible longest entityname
+int semi = java.lang.util.indexOf( src[0 .. len ], ';' );
+if( semi is -1 ) error(); // no semi found
+String entityName = src[ 0 .. semi ]; // name without semi
+dchar entityValue = 0;
+switch( entityName ){
+case "lt":   entityValue = '<'; break;
+case "gt":   entityValue = '>'; break;
+case "amp":  entityValue = '&'; break;
+case "quot": entityValue = '\"'; break;
+case "apos": entityValue = '\''; break;
+default:
+if( entityName[0] is 'x' ){
+if( semi < 2 ) error();
+if( semi > 9 ) error();
+foreach( hex; entityName[1..$] ){
+entityValue <<= 4;
+if( hex >= '0' && hex <= '9' ){
+entityValue |= ( hex - '0' );
+}
+else if( hex >= 'a' && hex <= 'f' ){
+entityValue |= ( hex - 'a' );
+}
+else if( hex >= 'A' && hex <= 'F' ){
+entityValue |= ( hex - 'A' );
+}
+else{
+error();
+}
+}
+}
+else{
+if( semi < 1 ) error();
+if( semi > 9 ) error();
+foreach( dec; entityName[1..$] ){
+if( dec >= '0' && dec <= '9' ){
+entityValue *= 10;
+entityValue += ( dec - '0' );
+}
+else{
+error();
+}
+}
+}
+}
+String res = String_valueOf( entityValue );
+trg = trg[ res.length .. $ ];
+src = src[ semi +1 .. $ ]; // go past semi
+}
+}
+return str[ 0 .. trg.ptr-str.ptr ];
+}
+/++
++ Encode XML entities into UTF8 string.
++ First checks if processing is needed.
++ If not, the original string is returned.
++ If processing is needed, a new string is allocated.
++/
+String xmlEscape( String xml ){
+bool needsReplacement( dchar c ){
+switch( c ){
+case '<':
+case '>':
+case '&':
+case '\"':
+case '\'':
+case '\r':
+case '\n':
+case '\u0009':
+return true;
+default:
+return c > 0x7F;
+}
+}
+// Check if processing is needed
+foreach( char c; xml ){
+if( needsReplacement( c )){
+goto Lprocess;
+}
+}
+return xml;
+Lprocess:
+// yes, do a new string, start with +20 chars
+char[] res = new char[ xml.length + 20 ];
+res.length = 0;
+foreach( dchar c; xml ){
+if( !needsReplacement( c )){
+res ~= c;
+}
+else{
+res ~= '&';
+switch( c ){
+case '<': res ~= "lt"; break;
+case '>': res ~= "gt"; break;
+case '&': res ~= "amp"; break;
+case '\"': res ~= "quot"; break;
+case '\'': res ~= "apos"; break;
+case '\r': case '\n': case '\u0009':
+default:
+char toHexDigit( int i ){
+if( i < 10 ) return '0'+i;
+return 'A'+i-10;
+}
+res ~= "#x";
+if( c <= 0xFF ){
+res ~= toHexDigit(( c >> 4 ) & 0x0F );
+res ~= toHexDigit(( c >> 0 ) & 0x0F );
+}
+else if( c <= 0xFFFF ){
+res ~= toHexDigit(( c >> 12 ) & 0x0F );
+res ~= toHexDigit(( c >> 8 ) & 0x0F );
+res ~= toHexDigit(( c >> 4 ) & 0x0F );
+res ~= toHexDigit(( c >> 0 ) & 0x0F );
+}
+else if( c <= 0xFFFFFF ){
+res ~= toHexDigit(( c >> 20 ) & 0x0F );
+res ~= toHexDigit(( c >> 16 ) & 0x0F );
+res ~= toHexDigit(( c >> 12 ) & 0x0F );
+res ~= toHexDigit(( c >> 8 ) & 0x0F );
+res ~= toHexDigit(( c >> 4 ) & 0x0F );
+res ~= toHexDigit(( c >> 0 ) & 0x0F );
+}
+else {
+res ~= toHexDigit(( c >> 28 ) & 0x0F );
+res ~= toHexDigit(( c >> 24 ) & 0x0F );
+res ~= toHexDigit(( c >> 20 ) & 0x0F );
+res ~= toHexDigit(( c >> 16 ) & 0x0F );
+res ~= toHexDigit(( c >> 12 ) & 0x0F );
+res ~= toHexDigit(( c >> 8 ) & 0x0F );
+res ~= toHexDigit(( c >> 4 ) & 0x0F );
+res ~= toHexDigit(( c >> 0 ) & 0x0F );
+}
+break;
+}
+res ~= ';';
+}
+}
+}

Mercurial > projects > dwt2

comparison base/src/java/nonstandard/XmlTranscode.d @ 27:1bf55a6eb092