Mercurial > projects > dwt2
view base/src/java/nonstandard/SafeUtf.d @ 125:c43718956f21 default tip
Updated the snippets status.
author | Jacob Carlborg <doob@me.com> |
---|---|
date | Thu, 11 Aug 2011 19:55:14 +0200 |
parents | 536e43f63c81 |
children |
line wrap: on
line source
/** * Stuff for working with narrow strings. * Safe because of strong type checking. * * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com> */ module java.nonstandard.SafeUtf; import java.nonstandard.UtfBase; private const bool UTFTypeCheck = true; mixin(UtfBaseText); unittest { auto s = "abаб回家\U00064321\U00064321d"; assert(s.length == 1+1+2+2+3+3+4+4+1); auto ws = "abаб回家\U00064321\U00064321d"w; assert(ws.length == 1+1+1+1+1+1+2+2+1); auto dchars = "abаб回家\U00064321\U00064321d"d; auto starts = [1, 1, 1,0, 1,0, 1,0,0, 1,0,0, 1,0,0,0, 1,0,0,0, 1]; auto wstarts = [1, 1, 1, 1, 1, 1, 1,0, 1,0 , 1]; assert(s.length == starts.length); assert(ws.length == wstarts.length); auto strides = [1, 1, 2, 2, 3, 3, 4, 4, 1]; auto wstrides = [1, 1, 1, 1, 1, 1, 2, 2, 1]; auto shifts0 = [0, 1, 1+1, 1+1+2, 1+1+2+2, 1+1+2+2+3, 1+1+2+2+3+3, 1+1+2+2+3+3+4, 1+1+2+2+3+3+4+4]; assert(strides.length == dchars.length); assert(wstrides.length == dchars.length); assert(shifts0.length == dchars.length); UTF8index prevStart = 0; UCSindex n = 0; foreach(UTF8index i, char ch; s) { assert(s.isUTF8sequenceStart(i) == starts[i]); if(starts[i]) { s.validateUTF8index(i); assert(s.UTF8strideAt(i) == strides[n]); assert(s.toUTF8shift(0, n) == shifts0[n]); assert(s.toUTF8shift(shifts0[n], -n) == -shifts0[n]); if(i) assert(s.offsetBefore(i) == prevStart); assert(s[0 .. val(i)].UCScount == n); assert(s[val(i) .. $].UCScount == strides.length - n); UTF8shift di; assert(s.dcharAt(i, di) == dchars[n]); assert(di == strides[n]); if(i) assert(s.dcharBefore(i) == s.dcharAt(prevStart)); if(i) assert(s.dcharAfter(prevStart) == s.dcharAt(i)); auto dcharStr = s[val(i) .. val(i) + strides[n]]; assert(s.dcharAsStringAt(i, di) == dcharStr && di == dcharStr.length); assert(dcharToString(s.dcharAt(i)) == dcharStr); prevStart = i; ++n; } UTF8index t = i; s.adjustUTF8index(t); assert(t == prevStart); } n = 0; foreach(UTF16index i, wchar ch; ws) if(wstarts[i]) { //s.validateUTF16index(i); UTF16shift di; assert(ws.dcharAt(i, di) == dchars[n]); assert(di == wstrides[n]); ++n; } s.validateUTF8index(s.length); }