Mercurial > projects > ddmd
view dmd/StringExp.d @ 0:10317f0c89a5
Initial commit
author | korDen |
---|---|
date | Sat, 24 Oct 2009 08:42:06 +0400 |
parents | |
children | 7427ded8caf7 |
line wrap: on
line source
module dmd.StringExp; import dmd.Expression; import dmd.backend.elem; import dmd.InterState; import dmd.TypeSArray; import dmd.CastExp; import dmd.MATCH; import dmd.TY; import dmd.TypeDArray; import dmd.Type; import dmd.TOK; import dmd.OutBuffer; import dmd.Loc; import dmd.Scope; import dmd.IRState; import dmd.StringExp; import dmd.HdrGenState; import dmd.Utf; import dmd.backend.dt_t; import dmd.backend.Symbol; import dmd.backend.StringTab; import dmd.backend.Util; import dmd.backend.SC; import dmd.backend.TYM; import dmd.backend.FL; import dmd.backend.TYPE; import dmd.backend.OPER; import core.stdc.string; class StringExp : Expression { void* string_; // char, wchar, or dchar data size_t len; // number of chars, wchars, or dchars ubyte sz; // 1: char, 2: wchar, 4: dchar ubyte committed; // !=0 if type is committed ubyte postfix; // 'c', 'w', 'd' this(Loc loc, string s) { this(loc, s, 0); } this(Loc loc, string s, ubyte postfix) { super(loc, TOK.TOKstring, StringExp.sizeof); this.string_ = cast(void*)s.ptr; this.len = s.length; this.sz = 1; this.committed = 0; this.postfix = postfix; } int equals(Object o) { assert(false); } string toChars() { assert(false); } Expression semantic(Scope sc) { version (LOGSEMANTIC) { printf("StringExp.semantic() %s\n", toChars()); } if (!type) { scope OutBuffer buffer = new OutBuffer(); size_t newlen = 0; string p; size_t u; dchar c; switch (postfix) { case 'd': for (u = 0; u < len;) { p = utf_decodeChar(cast(string)string_[0..len], &u, &c); if (p !is null) { error("%s", p); break; } else { buffer.write4(c); newlen++; } } buffer.write4(0); string_ = buffer.extractData(); len = newlen; sz = 4; //type = new TypeSArray(Type.tdchar, new IntegerExp(loc, len, Type.tindex)); type = new TypeDArray(Type.tdchar.invariantOf()); committed = 1; break; case 'w': for (u = 0; u < len;) { p = utf_decodeChar(cast(string)string_[0..len], &u, &c); if (p !is null) { error("%s", p); break; } else { buffer.writeUTF16(c); newlen++; if (c >= 0x10000) newlen++; } } buffer.writeUTF16(0); string_ = buffer.extractData(); len = newlen; sz = 2; //type = new TypeSArray(Type.twchar, new IntegerExp(loc, len, Type.tindex)); type = new TypeDArray(Type.twchar.invariantOf()); committed = 1; break; case 'c': committed = 1; default: //type = new TypeSArray(Type.tchar, new IntegerExp(loc, len, Type.tindex)); type = new TypeDArray(Type.tchar.invariantOf()); break; } type = type.semantic(loc, sc); //type = type.invariantOf(); //printf("type = %s\n", type.toChars()); } return this; } Expression interpret(InterState* istate) { assert(false); } size_t length() { assert(false); } StringExp toUTF8(Scope sc) { assert(false); } Expression implicitCastTo(Scope sc, Type t) { //printf("StringExp.implicitCastTo(%s of type %s) => %s\n", toChars(), type.toChars(), t.toChars()); ubyte committed = this.committed; Expression e = Expression.implicitCastTo(sc, t); if (e.op == TOK.TOKstring) { // Retain polysemous nature if it started out that way (cast(StringExp)e).committed = committed; } return e; } MATCH implicitConvTo(Type t) { MATCH m; static if (false) { printf("StringExp.implicitConvTo(this=%s, committed=%d, type=%s, t=%s)\n", toChars(), committed, type.toChars(), t.toChars()); } if (!committed) { if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid) { return MATCH.MATCHnomatch; } if (type.ty == TY.Tsarray || type.ty == TY.Tarray || type.ty == TY.Tpointer) { TY tyn = type.nextOf().ty; if (tyn == TY.Tchar || tyn == TY.Twchar || tyn == TY.Tdchar) { Type tn; MATCH mm; switch (t.ty) { case TY.Tsarray: if (type.ty == TY.Tsarray) { if ((cast(TypeSArray)type).dim.toInteger() != (cast(TypeSArray)t).dim.toInteger()) return MATCH.MATCHnomatch; TY tynto = t.nextOf().ty; if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar) return MATCH.MATCHexact; } else if (type.ty == TY.Tarray) { if (length() > (cast(TypeSArray)t).dim.toInteger()) return MATCH.MATCHnomatch; TY tynto = t.nextOf().ty; if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar) return MATCH.MATCHexact; } case TY.Tarray: case TY.Tpointer: tn = t.nextOf(); mm = MATCH.MATCHexact; if (type.nextOf().mod != tn.mod) { if (!tn.isConst()) return MATCH.MATCHnomatch; mm = MATCH.MATCHconst; } switch (tn.ty) { case TY.Tchar: case TY.Twchar: case TY.Tdchar: return mm; } break; default: break; /// } } } } return Expression.implicitConvTo(t); static if (false) { m = cast(MATCH)type.implicitConvTo(t); if (m) { return m; } return MATCH.MATCHnomatch; } } static uint X(TY tf, TY tt) { return ((tf) * 256 + (tt)); } Expression castTo(Scope sc, Type t) { /* This follows copy-on-write; any changes to 'this' * will result in a copy. * The this.string member is considered immutable. */ StringExp se; Type tb; int copied = 0; //printf("StringExp.castTo(t = %s), '%s' committed = %d\n", t.toChars(), toChars(), committed); if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid) { error("cannot convert string literal to void*"); } se = this; if (!committed) { se = cast(StringExp)copy(); se.committed = 1; copied = 1; } if (type == t) { return se; } tb = t.toBasetype(); //printf("\ttype = %s\n", type.toChars()); if (tb.ty == TY.Tdelegate && type.toBasetype().ty != TY.Tdelegate) return Expression.castTo(sc, t); Type typeb = type.toBasetype(); if (typeb == tb) { if (!copied) { se = cast(StringExp)copy(); copied = 1; } se.type = t; return se; } if (committed && tb.ty == TY.Tsarray && typeb.ty == TY.Tarray) { se = cast(StringExp)copy(); se.sz = cast(ubyte)tb.nextOf().size(); se.len = (len * sz) / se.sz; se.committed = 1; se.type = t; return se; } if (tb.ty != TY.Tsarray && tb.ty != TY.Tarray && tb.ty != TY.Tpointer) { if (!copied) { se = cast(StringExp)copy(); copied = 1; } goto Lcast; } if (typeb.ty != TY.Tsarray && typeb.ty != TY.Tarray && typeb.ty != TY.Tpointer) { if (!copied) { se = cast(StringExp)copy(); copied = 1; } goto Lcast; } if (typeb.nextOf().size() == tb.nextOf().size()) { if (!copied) { se = cast(StringExp)copy(); copied = 1; } if (tb.ty == TY.Tsarray) goto L2; // handle possible change in static array dimension se.type = t; return se; } if (committed) goto Lcast; { scope OutBuffer buffer = new OutBuffer(); size_t newlen = 0; TY tfty = typeb.nextOf().toBasetype().ty; TY ttty = tb.nextOf().toBasetype().ty; switch (X(tfty, ttty)) { case X(TY.Tchar, TY.Tchar): case X(TY.Twchar,TY.Twchar): case X(TY.Tdchar,TY.Tdchar): break; case X(TY.Tchar, TY.Twchar): for (size_t u = 0; u < len;) { dchar c; string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c); if (p !is null) error("%s", p); else buffer.writeUTF16(c); } newlen = buffer.offset / 2; buffer.writeUTF16(0); goto L1; case X(TY.Tchar, TY.Tdchar): for (size_t u = 0; u < len;) { dchar c; string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c); if (p !is null) error("%s", p); buffer.write4(c); newlen++; } buffer.write4(0); goto L1; case X(TY.Twchar,TY.Tchar): for (size_t u = 0; u < len;) { dchar c; string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c); if (p) error("%s", p); else buffer.writeUTF8(c); } newlen = buffer.offset; buffer.writeUTF8(0); goto L1; case X(TY.Twchar,TY.Tdchar): for (size_t u = 0; u < len;) { dchar c; string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c); if (p) error("%s", p); buffer.write4(c); newlen++; } buffer.write4(0); goto L1; case X(TY.Tdchar,TY.Tchar): for (size_t u = 0; u < len; u++) { dchar c = (cast(dchar*)se.string_)[u]; if (!utf_isValidDchar(c)) error("invalid UCS-32 char \\U%08x", c); else buffer.writeUTF8(c); newlen++; } newlen = buffer.offset; buffer.writeUTF8(0); goto L1; case X(TY.Tdchar,TY.Twchar): for (size_t u = 0; u < len; u++) { dchar c = (cast(dchar*)se.string_)[u]; if (!utf_isValidDchar(c)) error("invalid UCS-32 char \\U%08x", c); else buffer.writeUTF16(c); newlen++; } newlen = buffer.offset / 2; buffer.writeUTF16(0); goto L1; L1: if (!copied) { se = cast(StringExp)copy(); copied = 1; } se.string_ = buffer.extractData(); se.len = newlen; se.sz = cast(ubyte)tb.nextOf().size(); break; default: assert(typeb.nextOf().size() != tb.nextOf().size()); goto Lcast; } } L2: assert(copied); // See if need to truncate or extend the literal if (tb.ty == TY.Tsarray) { int dim2 = cast(int)(cast(TypeSArray)tb).dim.toInteger(); //printf("dim from = %d, to = %d\n", se.len, dim2); // Changing dimensions if (dim2 != se.len) { // Copy when changing the string literal uint newsz = se.sz; void *s; int d; d = (dim2 < se.len) ? dim2 : se.len; s = cast(ubyte*)malloc((dim2 + 1) * newsz); memcpy(s, se.string_, d * newsz); // Extend with 0, add terminating 0 memset(cast(char*)s + d * newsz, 0, (dim2 + 1 - d) * newsz); se.string_ = s; se.len = dim2; } } se.type = t; return se; Lcast: Expression e = new CastExp(loc, se, t); e.type = t; // so semantic() won't be run on e return e; } int compare(Object obj) { assert(false); } bool isBool(bool result) { assert(false); } uint charAt(size_t i) { assert(false); } void toCBuffer(OutBuffer buf, HdrGenState* hgs) { assert(false); } void toMangleBuffer(OutBuffer buf) { assert(false); } elem* toElem(IRState* irs) { elem* e; Type tb = type.toBasetype(); static if (false) { printf("StringExp.toElem() %s, type = %s\n", toChars(), type.toChars()); } if (tb.ty == TY.Tarray) { Symbol* si; dt_t* dt; StringTab* st; static if (false) { printf("irs.m = %p\n", irs.m); printf(" m = %s\n", irs.m.toChars()); printf(" len = %d\n", len); printf(" sz = %d\n", sz); } for (size_t i = 0; i < STSIZE; i++) { st = &stringTab[(stidx + i) % STSIZE]; //if (!st.m) continue; //printf(" st.m = %s\n", st.m.toChars()); //printf(" st.len = %d\n", st.len); //printf(" st.sz = %d\n", st.sz); if (st.m is irs.m && st.si && st.len == len && st.sz == sz && memcmp(st.string_, string_, sz * len) == 0) { //printf("use cached value\n"); si = st.si; // use cached value goto L1; } } stidx = (stidx + 1) % STSIZE; st = &stringTab[stidx]; dt = null; toDt(&dt); si = symbol_generate(SC.SCstatic, type_fake(TYM.TYdarray)); si.Sdt = dt; si.Sfl = FL.FLdata; version (ELFOBJ) {// Burton si.Sseg = Segment.CDATA; } version (MACHOBJ) { si.Sseg = Segment.DATA; } outdata(si); st.m = irs.m; st.si = si; st.string_ = string_; st.len = len; st.sz = sz; L1: e = el_var(si); } else if (tb.ty == TY.Tsarray) { Symbol *si; dt_t *dt = null; toDt(&dt); dtnzeros(&dt, sz); // leave terminating 0 si = symbol_generate(SC.SCstatic,type_allocn(TYM.TYarray, tschar)); si.Sdt = dt; si.Sfl = FL.FLdata; version (ELFOBJ_OR_MACHOBJ) { // Burton si.Sseg = Segment.CDATA; } outdata(si); e = el_var(si); } else if (tb.ty == TY.Tpointer) { e = el_calloc(); e.Eoper = OPER.OPstring; static if (true) { // Match MEM_PH_FREE for OPstring in ztc\el.c e.EV.ss.Vstring = cast(char*)malloc((len + 1) * sz); memcpy(e.EV.ss.Vstring, string_, (len + 1) * sz); } else { e.EV.ss.Vstring = cast(char*)string_; } e.EV.ss.Vstrlen = (len + 1) * sz; e.Ety = TYM.TYnptr; } else { writef("type is %s\n", type.toChars()); assert(0); } el_setLoc(e,loc); return e; } dt_t** toDt(dt_t** pdt) { //printf("StringExp.toDt() '%s', type = %s\n", toChars(), type.toChars()); Type t = type.toBasetype(); // BUG: should implement some form of static string pooling switch (t.ty) { case TY.Tarray: dtdword(pdt, len); pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_); break; case TY.Tsarray: { TypeSArray tsa = cast(TypeSArray)type; long dim; pdt = dtnbytes(pdt, len * sz, cast(const(char)*)string_); if (tsa.dim) { dim = tsa.dim.toInteger(); if (len < dim) { // Pad remainder with 0 pdt = dtnzeros(pdt, cast(uint)((dim - len) * tsa.next.size())); } } break; } case TY.Tpointer: pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_); break; default: writef("StringExp.toDt(type = %s)\n", type.toChars()); assert(0); } return pdt; } }