diff dmd/StringExp.d @ 0:10317f0c89a5

Initial commit
author korDen
date Sat, 24 Oct 2009 08:42:06 +0400
parents
children 7427ded8caf7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dmd/StringExp.d	Sat Oct 24 08:42:06 2009 +0400
@@ -0,0 +1,669 @@
+module dmd.StringExp;
+
+import dmd.Expression;
+import dmd.backend.elem;
+import dmd.InterState;
+import dmd.TypeSArray;
+import dmd.CastExp;
+import dmd.MATCH;
+import dmd.TY;
+import dmd.TypeDArray;
+import dmd.Type;
+import dmd.TOK;
+import dmd.OutBuffer;
+import dmd.Loc;
+import dmd.Scope;
+import dmd.IRState;
+import dmd.StringExp;
+import dmd.HdrGenState;
+import dmd.Utf;
+import dmd.backend.dt_t;
+import dmd.backend.Symbol;
+import dmd.backend.StringTab;
+import dmd.backend.Util;
+import dmd.backend.SC;
+import dmd.backend.TYM;
+import dmd.backend.FL;
+import dmd.backend.TYPE;
+import dmd.backend.OPER;
+
+import core.stdc.string;
+
+class StringExp : Expression
+{
+	void* string_;	// char, wchar, or dchar data
+    size_t len;		// number of chars, wchars, or dchars
+    ubyte sz;	// 1: char, 2: wchar, 4: dchar
+    ubyte committed;	// !=0 if type is committed
+    ubyte postfix;	// 'c', 'w', 'd'
+
+	this(Loc loc, string s)
+	{
+		this(loc, s, 0);
+	}
+
+	this(Loc loc, string s, ubyte postfix)
+	{
+		super(loc, TOK.TOKstring, StringExp.sizeof);
+		
+		this.string_ = cast(void*)s.ptr;
+		this.len = s.length;
+		this.sz = 1;
+		this.committed = 0;
+		this.postfix = postfix;
+	}
+
+	int equals(Object o)
+	{
+		assert(false);
+	}
+
+	string toChars()
+	{
+		assert(false);
+	}
+
+	Expression semantic(Scope sc)
+	{
+version (LOGSEMANTIC) {
+		printf("StringExp.semantic() %s\n", toChars());
+}
+		if (!type)
+		{	
+			scope OutBuffer buffer = new OutBuffer();
+			size_t newlen = 0;
+			string p;
+			size_t u;
+			dchar c;
+
+			switch (postfix)
+			{
+				case 'd':
+					for (u = 0; u < len;)
+					{
+						p = utf_decodeChar(cast(string)string_[0..len], &u, &c);
+						if (p !is null)
+						{	
+							error("%s", p);
+							break;
+						}
+						else
+						{	
+							buffer.write4(c);
+							newlen++;
+						}
+					}
+					buffer.write4(0);
+					string_ = buffer.extractData();
+					len = newlen;
+					sz = 4;
+					//type = new TypeSArray(Type.tdchar, new IntegerExp(loc, len, Type.tindex));
+					type = new TypeDArray(Type.tdchar.invariantOf());
+					committed = 1;
+					break;
+
+				case 'w':
+					for (u = 0; u < len;)
+					{
+						p = utf_decodeChar(cast(string)string_[0..len], &u, &c);
+						if (p !is null)
+						{	
+							error("%s", p);
+							break;
+						}
+						else
+						{	
+							buffer.writeUTF16(c);
+							newlen++;
+							if (c >= 0x10000)
+								newlen++;
+						}
+					}
+					buffer.writeUTF16(0);
+					string_ = buffer.extractData();
+					len = newlen;
+					sz = 2;
+					//type = new TypeSArray(Type.twchar, new IntegerExp(loc, len, Type.tindex));
+					type = new TypeDArray(Type.twchar.invariantOf());
+					committed = 1;
+					break;
+
+				case 'c':
+					committed = 1;
+				default:
+					//type = new TypeSArray(Type.tchar, new IntegerExp(loc, len, Type.tindex));
+					type = new TypeDArray(Type.tchar.invariantOf());
+					break;
+			}
+			type = type.semantic(loc, sc);
+			//type = type.invariantOf();
+			//printf("type = %s\n", type.toChars());
+		}
+		return this;
+	}
+
+	Expression interpret(InterState* istate)
+	{
+		assert(false);
+	}
+
+	size_t length()
+	{
+		assert(false);
+	}
+
+	StringExp toUTF8(Scope sc)
+	{
+		assert(false);
+	}
+
+	Expression implicitCastTo(Scope sc, Type t)
+	{
+		//printf("StringExp.implicitCastTo(%s of type %s) => %s\n", toChars(), type.toChars(), t.toChars());
+		ubyte committed = this.committed;
+		Expression e = Expression.implicitCastTo(sc, t);
+		if (e.op == TOK.TOKstring)
+		{
+			// Retain polysemous nature if it started out that way
+			(cast(StringExp)e).committed = committed;
+		}
+		return e;
+	}
+
+	MATCH implicitConvTo(Type t)
+	{
+		MATCH m;
+
+static if (false) {
+		printf("StringExp.implicitConvTo(this=%s, committed=%d, type=%s, t=%s)\n",
+			toChars(), committed, type.toChars(), t.toChars());
+}
+		if (!committed)
+		{
+			if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid)
+			{
+				return MATCH.MATCHnomatch;
+			}
+			if (type.ty == TY.Tsarray || type.ty == TY.Tarray || type.ty == TY.Tpointer)
+			{
+				TY tyn = type.nextOf().ty;
+				if (tyn == TY.Tchar || tyn == TY.Twchar || tyn == TY.Tdchar)
+				{   
+					Type tn;
+					MATCH mm;
+
+					switch (t.ty)
+					{
+						case TY.Tsarray:
+							if (type.ty == TY.Tsarray)
+							{
+								if ((cast(TypeSArray)type).dim.toInteger() !=
+									(cast(TypeSArray)t).dim.toInteger())
+									return MATCH.MATCHnomatch;
+								TY tynto = t.nextOf().ty;
+								if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar)
+									return MATCH.MATCHexact;
+							}
+							else if (type.ty == TY.Tarray)
+							{
+								if (length() > (cast(TypeSArray)t).dim.toInteger())
+									return MATCH.MATCHnomatch;
+								TY tynto = t.nextOf().ty;
+								if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar)
+									return MATCH.MATCHexact;
+							}
+						case TY.Tarray:
+						case TY.Tpointer:
+							tn = t.nextOf();
+							mm = MATCH.MATCHexact;
+							if (type.nextOf().mod != tn.mod)
+							{	
+								if (!tn.isConst())
+									return MATCH.MATCHnomatch;
+								mm = MATCH.MATCHconst;
+							}
+							switch (tn.ty)
+							{
+								case TY.Tchar:
+								case TY.Twchar:
+								case TY.Tdchar:
+									return mm;
+							}
+							break;
+						default:
+							break;	///
+					}
+				}
+			}
+		}
+		return Expression.implicitConvTo(t);
+static if (false) {
+		m = cast(MATCH)type.implicitConvTo(t);
+		if (m)
+		{
+			return m;
+		}
+
+		return MATCH.MATCHnomatch;
+}
+	}
+	
+	static uint X(TY tf, TY tt) {
+		return ((tf) * 256 + (tt));
+	}
+
+	Expression castTo(Scope sc, Type t)
+	{
+		/* This follows copy-on-write; any changes to 'this'
+		 * will result in a copy.
+		 * The this.string member is considered immutable.
+		 */
+		StringExp se;
+		Type tb;
+		int copied = 0;
+
+		//printf("StringExp.castTo(t = %s), '%s' committed = %d\n", t.toChars(), toChars(), committed);
+
+		if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid)
+		{
+			error("cannot convert string literal to void*");
+		}
+
+		se = this;
+		if (!committed)
+		{   
+			se = cast(StringExp)copy();
+			se.committed = 1;
+			copied = 1;
+		}
+
+		if (type == t)
+		{
+			return se;
+		}
+
+		tb = t.toBasetype();
+		//printf("\ttype = %s\n", type.toChars());
+		if (tb.ty == TY.Tdelegate && type.toBasetype().ty != TY.Tdelegate)
+			return Expression.castTo(sc, t);
+
+		Type typeb = type.toBasetype();
+		if (typeb == tb)
+		{
+			if (!copied)
+			{   
+				se = cast(StringExp)copy();
+				copied = 1;
+			}
+			se.type = t;
+			return se;
+		}
+
+		if (committed && tb.ty == TY.Tsarray && typeb.ty == TY.Tarray)
+		{
+			se = cast(StringExp)copy();
+			se.sz = cast(ubyte)tb.nextOf().size();
+			se.len = (len * sz) / se.sz;
+			se.committed = 1;
+			se.type = t;
+			return se;
+		}
+
+		if (tb.ty != TY.Tsarray && tb.ty != TY.Tarray && tb.ty != TY.Tpointer)
+		{
+			if (!copied)
+			{   
+				se = cast(StringExp)copy();
+				copied = 1;
+			}
+			goto Lcast;
+		}
+		if (typeb.ty != TY.Tsarray && typeb.ty != TY.Tarray && typeb.ty != TY.Tpointer)
+		{	
+			if (!copied)
+			{   
+				se = cast(StringExp)copy();
+				copied = 1;
+			}
+			goto Lcast;
+		}
+
+		if (typeb.nextOf().size() == tb.nextOf().size())
+		{
+			if (!copied)
+			{
+				se = cast(StringExp)copy();
+				copied = 1;
+			}
+			
+			if (tb.ty == TY.Tsarray)
+				goto L2;	// handle possible change in static array dimension
+			se.type = t;
+			return se;
+		}
+
+		if (committed)
+			goto Lcast;
+
+		{
+			scope OutBuffer buffer = new OutBuffer();
+			size_t newlen = 0;
+			TY tfty = typeb.nextOf().toBasetype().ty;
+			TY ttty = tb.nextOf().toBasetype().ty;
+			switch (X(tfty, ttty))
+			{
+				case X(TY.Tchar, TY.Tchar):
+				case X(TY.Twchar,TY.Twchar):
+				case X(TY.Tdchar,TY.Tdchar):
+					break;
+
+				case X(TY.Tchar, TY.Twchar):
+					for (size_t u = 0; u < len;)
+					{	
+						dchar c;
+						string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c);
+						if (p !is null)
+							error("%s", p);
+						else
+							buffer.writeUTF16(c);
+					}
+					newlen = buffer.offset / 2;
+					buffer.writeUTF16(0);
+					goto L1;
+
+				case X(TY.Tchar, TY.Tdchar):
+					for (size_t u = 0; u < len;)
+					{	
+						dchar c;
+						string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c);
+						if (p !is null)
+							error("%s", p);
+						buffer.write4(c);
+						newlen++;
+					}
+					buffer.write4(0);
+					goto L1;
+
+				case X(TY.Twchar,TY.Tchar):
+					for (size_t u = 0; u < len;)
+					{	
+						dchar c;
+						string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c);
+						if (p)
+							error("%s", p);
+						else
+							buffer.writeUTF8(c);
+					}
+					newlen = buffer.offset;
+					buffer.writeUTF8(0);
+					goto L1;
+
+				case X(TY.Twchar,TY.Tdchar):
+					for (size_t u = 0; u < len;)
+					{	
+						dchar c;
+						string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c);
+						if (p)
+							error("%s", p);
+						buffer.write4(c);
+						newlen++;
+					}
+					buffer.write4(0);
+					goto L1;
+
+				case X(TY.Tdchar,TY.Tchar):
+					for (size_t u = 0; u < len; u++)
+					{
+						dchar c = (cast(dchar*)se.string_)[u];
+						if (!utf_isValidDchar(c))
+							error("invalid UCS-32 char \\U%08x", c);
+						else
+							buffer.writeUTF8(c);
+						newlen++;
+					}
+					newlen = buffer.offset;
+					buffer.writeUTF8(0);
+					goto L1;
+
+				case X(TY.Tdchar,TY.Twchar):
+					for (size_t u = 0; u < len; u++)
+					{
+						dchar c = (cast(dchar*)se.string_)[u];
+						if (!utf_isValidDchar(c))
+							error("invalid UCS-32 char \\U%08x", c);
+						else
+							buffer.writeUTF16(c);
+						newlen++;
+					}
+					newlen = buffer.offset / 2;
+					buffer.writeUTF16(0);
+					goto L1;
+
+				L1:
+					if (!copied)
+					{   
+						se = cast(StringExp)copy();
+						copied = 1;
+					}
+					se.string_ = buffer.extractData();
+					se.len = newlen;
+					se.sz = cast(ubyte)tb.nextOf().size();
+					break;
+
+				default:
+					assert(typeb.nextOf().size() != tb.nextOf().size());
+					goto Lcast;
+			}
+		}
+	L2:
+		assert(copied);
+
+		// See if need to truncate or extend the literal
+		if (tb.ty == TY.Tsarray)
+		{
+			int dim2 = cast(int)(cast(TypeSArray)tb).dim.toInteger();
+
+			//printf("dim from = %d, to = %d\n", se.len, dim2);
+
+			// Changing dimensions
+			if (dim2 != se.len)
+			{
+				// Copy when changing the string literal
+				uint newsz = se.sz;
+				void *s;
+				int d;
+
+				d = (dim2 < se.len) ? dim2 : se.len;
+				s = cast(ubyte*)malloc((dim2 + 1) * newsz);
+				memcpy(s, se.string_, d * newsz);
+				// Extend with 0, add terminating 0
+				memset(cast(char*)s + d * newsz, 0, (dim2 + 1 - d) * newsz);
+				se.string_ = s;
+				se.len = dim2;
+			}
+		}
+		se.type = t;
+		return se;
+
+	Lcast:
+		Expression e = new CastExp(loc, se, t);
+		e.type = t;	// so semantic() won't be run on e
+		return e;
+	}
+
+	int compare(Object obj)
+	{
+		assert(false);
+	}
+
+	bool isBool(bool result)
+	{
+		assert(false);
+	}
+
+	uint charAt(size_t i)
+	{
+		assert(false);
+	}
+
+	void toCBuffer(OutBuffer buf, HdrGenState* hgs)
+	{
+		assert(false);
+	}
+
+	void toMangleBuffer(OutBuffer buf)
+	{
+		assert(false);
+	}
+
+	elem* toElem(IRState* irs)
+	{
+		elem* e;
+		Type tb = type.toBasetype();
+
+static if (false) {
+		printf("StringExp.toElem() %s, type = %s\n", toChars(), type.toChars());
+}
+
+		if (tb.ty == TY.Tarray)
+		{
+			Symbol* si;
+			dt_t* dt;
+			StringTab* st;
+
+static if (false) {
+			printf("irs.m = %p\n", irs.m);
+			printf(" m   = %s\n", irs.m.toChars());
+			printf(" len = %d\n", len);
+			printf(" sz  = %d\n", sz);
+}
+			for (size_t i = 0; i < STSIZE; i++)
+			{
+				st = &stringTab[(stidx + i) % STSIZE];
+				//if (!st.m) continue;
+				//printf(" st.m   = %s\n", st.m.toChars());
+				//printf(" st.len = %d\n", st.len);
+				//printf(" st.sz  = %d\n", st.sz);
+				if (st.m is irs.m &&
+					st.si &&
+					st.len == len &&
+					st.sz == sz &&
+					memcmp(st.string_, string_, sz * len) == 0)
+				{
+					//printf("use cached value\n");
+					si = st.si;	// use cached value
+					goto L1;
+				}
+			}
+
+			stidx = (stidx + 1) % STSIZE;
+			st = &stringTab[stidx];
+
+			dt = null;
+			toDt(&dt);
+
+			si = symbol_generate(SC.SCstatic, type_fake(TYM.TYdarray));
+			si.Sdt = dt;
+			si.Sfl = FL.FLdata;
+version (ELFOBJ) {// Burton
+			si.Sseg = Segment.CDATA;
+}
+version (MACHOBJ) {
+			si.Sseg = Segment.DATA;
+}
+			outdata(si);
+
+			st.m = irs.m;
+			st.si = si;
+			st.string_ = string_;
+			st.len = len;
+			st.sz = sz;
+			L1:
+			e = el_var(si);
+		}
+		else if (tb.ty == TY.Tsarray)
+		{
+			Symbol *si;
+			dt_t *dt = null;
+
+			toDt(&dt);
+			dtnzeros(&dt, sz);		// leave terminating 0
+
+			si = symbol_generate(SC.SCstatic,type_allocn(TYM.TYarray, tschar));
+			si.Sdt = dt;
+			si.Sfl = FL.FLdata;
+
+version (ELFOBJ_OR_MACHOBJ) { // Burton
+			si.Sseg = Segment.CDATA;
+		}
+			outdata(si);
+
+			e = el_var(si);
+		}
+		else if (tb.ty == TY.Tpointer)
+		{
+			e = el_calloc();
+			e.Eoper = OPER.OPstring;
+static if (true) {
+			// Match MEM_PH_FREE for OPstring in ztc\el.c
+			e.EV.ss.Vstring = cast(char*)malloc((len + 1) * sz);
+			memcpy(e.EV.ss.Vstring, string_, (len + 1) * sz);
+} else {
+			e.EV.ss.Vstring = cast(char*)string_;
+}
+			e.EV.ss.Vstrlen = (len + 1) * sz;
+			e.Ety = TYM.TYnptr;
+		}
+		else
+		{
+			writef("type is %s\n", type.toChars());
+			assert(0);
+		}
+		el_setLoc(e,loc);
+		return e;
+	}
+
+	dt_t** toDt(dt_t** pdt)
+	{
+		//printf("StringExp.toDt() '%s', type = %s\n", toChars(), type.toChars());
+		Type t = type.toBasetype();
+
+		// BUG: should implement some form of static string pooling
+		switch (t.ty)
+		{
+			case TY.Tarray:
+				dtdword(pdt, len);
+				pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_);
+				break;
+
+			case TY.Tsarray:
+			{   
+				TypeSArray tsa = cast(TypeSArray)type;
+				long dim;
+
+				pdt = dtnbytes(pdt, len * sz, cast(const(char)*)string_);
+				if (tsa.dim)
+				{
+					dim = tsa.dim.toInteger();
+					if (len < dim)
+					{
+						// Pad remainder with 0
+						pdt = dtnzeros(pdt, cast(uint)((dim - len) * tsa.next.size()));
+					}
+				}
+				break;
+			}
+
+			case TY.Tpointer:
+				pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_);
+				break;
+
+			default:
+				writef("StringExp.toDt(type = %s)\n", type.toChars());
+				assert(0);
+		}
+
+		return pdt;
+	}
+}
+