Mercurial > projects > dwt2
diff base/src/java/nonstandard/XmlTranscode.d @ 27:1bf55a6eb092
Renamed java tree to base
author | Frank Benoit <benoit@tionex.de> |
---|---|
date | Sat, 21 Mar 2009 11:33:57 +0100 |
parents | java/src/java/nonstandard/XmlTranscode.d@9b96950f2c3c |
children | 9f4c18c268b2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/base/src/java/nonstandard/XmlTranscode.d Sat Mar 21 11:33:57 2009 +0100 @@ -0,0 +1,179 @@ +module java.nonstandard.XmlTranscode; + +import java.lang.util; +import java.lang.exceptions; +import java.lang.Math; + +/++ + + Decode XML entities into UTF8 string. + + Eg. "&" -> "&", "&" -> "&", "&" -> "&" + + Throws TextException on failure + + The given string is modified. + +/ +String xmlUnescape( String str ){ + + void error(){ + throw new RuntimeException( "xmlUnescape" ); + } + // < ... + // Ӓ + // ኯ + String src = str; + String trg = str; + while( src.length ){ + if( src[0] !is '&' ){ + //trg[0] = src[0]; + trg = trg[1..$]; + src = src[1..$]; + } + else{ + src = src[1..$]; // go past '&' + if( src.length < 2 ) error(); + + // search semi + int len = Math.min( cast(int)src.length, 10 ); // limit semi search to possible longest entityname + int semi = java.lang.util.indexOf( src[0 .. len ], ';' ); + if( semi is -1 ) error(); // no semi found + + String entityName = src[ 0 .. semi ]; // name without semi + dchar entityValue = 0; + switch( entityName ){ + case "lt": entityValue = '<'; break; + case "gt": entityValue = '>'; break; + case "amp": entityValue = '&'; break; + case "quot": entityValue = '\"'; break; + case "apos": entityValue = '\''; break; + default: + if( entityName[0] is 'x' ){ + if( semi < 2 ) error(); + if( semi > 9 ) error(); + foreach( hex; entityName[1..$] ){ + entityValue <<= 4; + if( hex >= '0' && hex <= '9' ){ + entityValue |= ( hex - '0' ); + } + else if( hex >= 'a' && hex <= 'f' ){ + entityValue |= ( hex - 'a' ); + } + else if( hex >= 'A' && hex <= 'F' ){ + entityValue |= ( hex - 'A' ); + } + else{ + error(); + } + } + } + else{ + if( semi < 1 ) error(); + if( semi > 9 ) error(); + foreach( dec; entityName[1..$] ){ + if( dec >= '0' && dec <= '9' ){ + entityValue *= 10; + entityValue += ( dec - '0' ); + } + else{ + error(); + } + } + } + } + String res = String_valueOf( entityValue ); + trg = trg[ res.length .. $ ]; + src = src[ semi +1 .. $ ]; // go past semi + } + } + return str[ 0 .. trg.ptr-str.ptr ]; +} + + +/++ + + Encode XML entities into UTF8 string. + + First checks if processing is needed. + + If not, the original string is returned. + + If processing is needed, a new string is allocated. + +/ +String xmlEscape( String xml ){ + bool needsReplacement( dchar c ){ + switch( c ){ + case '<': + case '>': + case '&': + case '\"': + case '\'': + case '\r': + case '\n': + case '\u0009': + return true; + default: + return c > 0x7F; + } + } + + // Check if processing is needed + foreach( char c; xml ){ + if( needsReplacement( c )){ + goto Lprocess; + } + } + return xml; +Lprocess: + + // yes, do a new string, start with +20 chars + char[] res = new char[ xml.length + 20 ]; + res.length = 0; + + foreach( dchar c; xml ){ + + if( !needsReplacement( c )){ + res ~= c; + } + else{ + res ~= '&'; + switch( c ){ + case '<': res ~= "lt"; break; + case '>': res ~= "gt"; break; + case '&': res ~= "amp"; break; + case '\"': res ~= "quot"; break; + case '\'': res ~= "apos"; break; + case '\r': case '\n': case '\u0009': + default: + char toHexDigit( int i ){ + if( i < 10 ) return '0'+i; + return 'A'+i-10; + } + res ~= "#x"; + if( c <= 0xFF ){ + res ~= toHexDigit(( c >> 4 ) & 0x0F ); + res ~= toHexDigit(( c >> 0 ) & 0x0F ); + } + else if( c <= 0xFFFF ){ + res ~= toHexDigit(( c >> 12 ) & 0x0F ); + res ~= toHexDigit(( c >> 8 ) & 0x0F ); + res ~= toHexDigit(( c >> 4 ) & 0x0F ); + res ~= toHexDigit(( c >> 0 ) & 0x0F ); + } + else if( c <= 0xFFFFFF ){ + res ~= toHexDigit(( c >> 20 ) & 0x0F ); + res ~= toHexDigit(( c >> 16 ) & 0x0F ); + res ~= toHexDigit(( c >> 12 ) & 0x0F ); + res ~= toHexDigit(( c >> 8 ) & 0x0F ); + res ~= toHexDigit(( c >> 4 ) & 0x0F ); + res ~= toHexDigit(( c >> 0 ) & 0x0F ); + } + else { + res ~= toHexDigit(( c >> 28 ) & 0x0F ); + res ~= toHexDigit(( c >> 24 ) & 0x0F ); + res ~= toHexDigit(( c >> 20 ) & 0x0F ); + res ~= toHexDigit(( c >> 16 ) & 0x0F ); + res ~= toHexDigit(( c >> 12 ) & 0x0F ); + res ~= toHexDigit(( c >> 8 ) & 0x0F ); + res ~= toHexDigit(( c >> 4 ) & 0x0F ); + res ~= toHexDigit(( c >> 0 ) & 0x0F ); + } + break; + } + res ~= ';'; + } + } +} +