comparison dmd/StringExp.d @ 0:10317f0c89a5

Initial commit
author korDen
date Sat, 24 Oct 2009 08:42:06 +0400
parents
children 7427ded8caf7
comparison
equal deleted inserted replaced
-1:000000000000 0:10317f0c89a5
1 module dmd.StringExp;
2
3 import dmd.Expression;
4 import dmd.backend.elem;
5 import dmd.InterState;
6 import dmd.TypeSArray;
7 import dmd.CastExp;
8 import dmd.MATCH;
9 import dmd.TY;
10 import dmd.TypeDArray;
11 import dmd.Type;
12 import dmd.TOK;
13 import dmd.OutBuffer;
14 import dmd.Loc;
15 import dmd.Scope;
16 import dmd.IRState;
17 import dmd.StringExp;
18 import dmd.HdrGenState;
19 import dmd.Utf;
20 import dmd.backend.dt_t;
21 import dmd.backend.Symbol;
22 import dmd.backend.StringTab;
23 import dmd.backend.Util;
24 import dmd.backend.SC;
25 import dmd.backend.TYM;
26 import dmd.backend.FL;
27 import dmd.backend.TYPE;
28 import dmd.backend.OPER;
29
30 import core.stdc.string;
31
32 class StringExp : Expression
33 {
34 void* string_; // char, wchar, or dchar data
35 size_t len; // number of chars, wchars, or dchars
36 ubyte sz; // 1: char, 2: wchar, 4: dchar
37 ubyte committed; // !=0 if type is committed
38 ubyte postfix; // 'c', 'w', 'd'
39
40 this(Loc loc, string s)
41 {
42 this(loc, s, 0);
43 }
44
45 this(Loc loc, string s, ubyte postfix)
46 {
47 super(loc, TOK.TOKstring, StringExp.sizeof);
48
49 this.string_ = cast(void*)s.ptr;
50 this.len = s.length;
51 this.sz = 1;
52 this.committed = 0;
53 this.postfix = postfix;
54 }
55
56 int equals(Object o)
57 {
58 assert(false);
59 }
60
61 string toChars()
62 {
63 assert(false);
64 }
65
66 Expression semantic(Scope sc)
67 {
68 version (LOGSEMANTIC) {
69 printf("StringExp.semantic() %s\n", toChars());
70 }
71 if (!type)
72 {
73 scope OutBuffer buffer = new OutBuffer();
74 size_t newlen = 0;
75 string p;
76 size_t u;
77 dchar c;
78
79 switch (postfix)
80 {
81 case 'd':
82 for (u = 0; u < len;)
83 {
84 p = utf_decodeChar(cast(string)string_[0..len], &u, &c);
85 if (p !is null)
86 {
87 error("%s", p);
88 break;
89 }
90 else
91 {
92 buffer.write4(c);
93 newlen++;
94 }
95 }
96 buffer.write4(0);
97 string_ = buffer.extractData();
98 len = newlen;
99 sz = 4;
100 //type = new TypeSArray(Type.tdchar, new IntegerExp(loc, len, Type.tindex));
101 type = new TypeDArray(Type.tdchar.invariantOf());
102 committed = 1;
103 break;
104
105 case 'w':
106 for (u = 0; u < len;)
107 {
108 p = utf_decodeChar(cast(string)string_[0..len], &u, &c);
109 if (p !is null)
110 {
111 error("%s", p);
112 break;
113 }
114 else
115 {
116 buffer.writeUTF16(c);
117 newlen++;
118 if (c >= 0x10000)
119 newlen++;
120 }
121 }
122 buffer.writeUTF16(0);
123 string_ = buffer.extractData();
124 len = newlen;
125 sz = 2;
126 //type = new TypeSArray(Type.twchar, new IntegerExp(loc, len, Type.tindex));
127 type = new TypeDArray(Type.twchar.invariantOf());
128 committed = 1;
129 break;
130
131 case 'c':
132 committed = 1;
133 default:
134 //type = new TypeSArray(Type.tchar, new IntegerExp(loc, len, Type.tindex));
135 type = new TypeDArray(Type.tchar.invariantOf());
136 break;
137 }
138 type = type.semantic(loc, sc);
139 //type = type.invariantOf();
140 //printf("type = %s\n", type.toChars());
141 }
142 return this;
143 }
144
145 Expression interpret(InterState* istate)
146 {
147 assert(false);
148 }
149
150 size_t length()
151 {
152 assert(false);
153 }
154
155 StringExp toUTF8(Scope sc)
156 {
157 assert(false);
158 }
159
160 Expression implicitCastTo(Scope sc, Type t)
161 {
162 //printf("StringExp.implicitCastTo(%s of type %s) => %s\n", toChars(), type.toChars(), t.toChars());
163 ubyte committed = this.committed;
164 Expression e = Expression.implicitCastTo(sc, t);
165 if (e.op == TOK.TOKstring)
166 {
167 // Retain polysemous nature if it started out that way
168 (cast(StringExp)e).committed = committed;
169 }
170 return e;
171 }
172
173 MATCH implicitConvTo(Type t)
174 {
175 MATCH m;
176
177 static if (false) {
178 printf("StringExp.implicitConvTo(this=%s, committed=%d, type=%s, t=%s)\n",
179 toChars(), committed, type.toChars(), t.toChars());
180 }
181 if (!committed)
182 {
183 if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid)
184 {
185 return MATCH.MATCHnomatch;
186 }
187 if (type.ty == TY.Tsarray || type.ty == TY.Tarray || type.ty == TY.Tpointer)
188 {
189 TY tyn = type.nextOf().ty;
190 if (tyn == TY.Tchar || tyn == TY.Twchar || tyn == TY.Tdchar)
191 {
192 Type tn;
193 MATCH mm;
194
195 switch (t.ty)
196 {
197 case TY.Tsarray:
198 if (type.ty == TY.Tsarray)
199 {
200 if ((cast(TypeSArray)type).dim.toInteger() !=
201 (cast(TypeSArray)t).dim.toInteger())
202 return MATCH.MATCHnomatch;
203 TY tynto = t.nextOf().ty;
204 if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar)
205 return MATCH.MATCHexact;
206 }
207 else if (type.ty == TY.Tarray)
208 {
209 if (length() > (cast(TypeSArray)t).dim.toInteger())
210 return MATCH.MATCHnomatch;
211 TY tynto = t.nextOf().ty;
212 if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar)
213 return MATCH.MATCHexact;
214 }
215 case TY.Tarray:
216 case TY.Tpointer:
217 tn = t.nextOf();
218 mm = MATCH.MATCHexact;
219 if (type.nextOf().mod != tn.mod)
220 {
221 if (!tn.isConst())
222 return MATCH.MATCHnomatch;
223 mm = MATCH.MATCHconst;
224 }
225 switch (tn.ty)
226 {
227 case TY.Tchar:
228 case TY.Twchar:
229 case TY.Tdchar:
230 return mm;
231 }
232 break;
233 default:
234 break; ///
235 }
236 }
237 }
238 }
239 return Expression.implicitConvTo(t);
240 static if (false) {
241 m = cast(MATCH)type.implicitConvTo(t);
242 if (m)
243 {
244 return m;
245 }
246
247 return MATCH.MATCHnomatch;
248 }
249 }
250
251 static uint X(TY tf, TY tt) {
252 return ((tf) * 256 + (tt));
253 }
254
255 Expression castTo(Scope sc, Type t)
256 {
257 /* This follows copy-on-write; any changes to 'this'
258 * will result in a copy.
259 * The this.string member is considered immutable.
260 */
261 StringExp se;
262 Type tb;
263 int copied = 0;
264
265 //printf("StringExp.castTo(t = %s), '%s' committed = %d\n", t.toChars(), toChars(), committed);
266
267 if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid)
268 {
269 error("cannot convert string literal to void*");
270 }
271
272 se = this;
273 if (!committed)
274 {
275 se = cast(StringExp)copy();
276 se.committed = 1;
277 copied = 1;
278 }
279
280 if (type == t)
281 {
282 return se;
283 }
284
285 tb = t.toBasetype();
286 //printf("\ttype = %s\n", type.toChars());
287 if (tb.ty == TY.Tdelegate && type.toBasetype().ty != TY.Tdelegate)
288 return Expression.castTo(sc, t);
289
290 Type typeb = type.toBasetype();
291 if (typeb == tb)
292 {
293 if (!copied)
294 {
295 se = cast(StringExp)copy();
296 copied = 1;
297 }
298 se.type = t;
299 return se;
300 }
301
302 if (committed && tb.ty == TY.Tsarray && typeb.ty == TY.Tarray)
303 {
304 se = cast(StringExp)copy();
305 se.sz = cast(ubyte)tb.nextOf().size();
306 se.len = (len * sz) / se.sz;
307 se.committed = 1;
308 se.type = t;
309 return se;
310 }
311
312 if (tb.ty != TY.Tsarray && tb.ty != TY.Tarray && tb.ty != TY.Tpointer)
313 {
314 if (!copied)
315 {
316 se = cast(StringExp)copy();
317 copied = 1;
318 }
319 goto Lcast;
320 }
321 if (typeb.ty != TY.Tsarray && typeb.ty != TY.Tarray && typeb.ty != TY.Tpointer)
322 {
323 if (!copied)
324 {
325 se = cast(StringExp)copy();
326 copied = 1;
327 }
328 goto Lcast;
329 }
330
331 if (typeb.nextOf().size() == tb.nextOf().size())
332 {
333 if (!copied)
334 {
335 se = cast(StringExp)copy();
336 copied = 1;
337 }
338
339 if (tb.ty == TY.Tsarray)
340 goto L2; // handle possible change in static array dimension
341 se.type = t;
342 return se;
343 }
344
345 if (committed)
346 goto Lcast;
347
348 {
349 scope OutBuffer buffer = new OutBuffer();
350 size_t newlen = 0;
351 TY tfty = typeb.nextOf().toBasetype().ty;
352 TY ttty = tb.nextOf().toBasetype().ty;
353 switch (X(tfty, ttty))
354 {
355 case X(TY.Tchar, TY.Tchar):
356 case X(TY.Twchar,TY.Twchar):
357 case X(TY.Tdchar,TY.Tdchar):
358 break;
359
360 case X(TY.Tchar, TY.Twchar):
361 for (size_t u = 0; u < len;)
362 {
363 dchar c;
364 string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c);
365 if (p !is null)
366 error("%s", p);
367 else
368 buffer.writeUTF16(c);
369 }
370 newlen = buffer.offset / 2;
371 buffer.writeUTF16(0);
372 goto L1;
373
374 case X(TY.Tchar, TY.Tdchar):
375 for (size_t u = 0; u < len;)
376 {
377 dchar c;
378 string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c);
379 if (p !is null)
380 error("%s", p);
381 buffer.write4(c);
382 newlen++;
383 }
384 buffer.write4(0);
385 goto L1;
386
387 case X(TY.Twchar,TY.Tchar):
388 for (size_t u = 0; u < len;)
389 {
390 dchar c;
391 string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c);
392 if (p)
393 error("%s", p);
394 else
395 buffer.writeUTF8(c);
396 }
397 newlen = buffer.offset;
398 buffer.writeUTF8(0);
399 goto L1;
400
401 case X(TY.Twchar,TY.Tdchar):
402 for (size_t u = 0; u < len;)
403 {
404 dchar c;
405 string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c);
406 if (p)
407 error("%s", p);
408 buffer.write4(c);
409 newlen++;
410 }
411 buffer.write4(0);
412 goto L1;
413
414 case X(TY.Tdchar,TY.Tchar):
415 for (size_t u = 0; u < len; u++)
416 {
417 dchar c = (cast(dchar*)se.string_)[u];
418 if (!utf_isValidDchar(c))
419 error("invalid UCS-32 char \\U%08x", c);
420 else
421 buffer.writeUTF8(c);
422 newlen++;
423 }
424 newlen = buffer.offset;
425 buffer.writeUTF8(0);
426 goto L1;
427
428 case X(TY.Tdchar,TY.Twchar):
429 for (size_t u = 0; u < len; u++)
430 {
431 dchar c = (cast(dchar*)se.string_)[u];
432 if (!utf_isValidDchar(c))
433 error("invalid UCS-32 char \\U%08x", c);
434 else
435 buffer.writeUTF16(c);
436 newlen++;
437 }
438 newlen = buffer.offset / 2;
439 buffer.writeUTF16(0);
440 goto L1;
441
442 L1:
443 if (!copied)
444 {
445 se = cast(StringExp)copy();
446 copied = 1;
447 }
448 se.string_ = buffer.extractData();
449 se.len = newlen;
450 se.sz = cast(ubyte)tb.nextOf().size();
451 break;
452
453 default:
454 assert(typeb.nextOf().size() != tb.nextOf().size());
455 goto Lcast;
456 }
457 }
458 L2:
459 assert(copied);
460
461 // See if need to truncate or extend the literal
462 if (tb.ty == TY.Tsarray)
463 {
464 int dim2 = cast(int)(cast(TypeSArray)tb).dim.toInteger();
465
466 //printf("dim from = %d, to = %d\n", se.len, dim2);
467
468 // Changing dimensions
469 if (dim2 != se.len)
470 {
471 // Copy when changing the string literal
472 uint newsz = se.sz;
473 void *s;
474 int d;
475
476 d = (dim2 < se.len) ? dim2 : se.len;
477 s = cast(ubyte*)malloc((dim2 + 1) * newsz);
478 memcpy(s, se.string_, d * newsz);
479 // Extend with 0, add terminating 0
480 memset(cast(char*)s + d * newsz, 0, (dim2 + 1 - d) * newsz);
481 se.string_ = s;
482 se.len = dim2;
483 }
484 }
485 se.type = t;
486 return se;
487
488 Lcast:
489 Expression e = new CastExp(loc, se, t);
490 e.type = t; // so semantic() won't be run on e
491 return e;
492 }
493
494 int compare(Object obj)
495 {
496 assert(false);
497 }
498
499 bool isBool(bool result)
500 {
501 assert(false);
502 }
503
504 uint charAt(size_t i)
505 {
506 assert(false);
507 }
508
509 void toCBuffer(OutBuffer buf, HdrGenState* hgs)
510 {
511 assert(false);
512 }
513
514 void toMangleBuffer(OutBuffer buf)
515 {
516 assert(false);
517 }
518
519 elem* toElem(IRState* irs)
520 {
521 elem* e;
522 Type tb = type.toBasetype();
523
524 static if (false) {
525 printf("StringExp.toElem() %s, type = %s\n", toChars(), type.toChars());
526 }
527
528 if (tb.ty == TY.Tarray)
529 {
530 Symbol* si;
531 dt_t* dt;
532 StringTab* st;
533
534 static if (false) {
535 printf("irs.m = %p\n", irs.m);
536 printf(" m = %s\n", irs.m.toChars());
537 printf(" len = %d\n", len);
538 printf(" sz = %d\n", sz);
539 }
540 for (size_t i = 0; i < STSIZE; i++)
541 {
542 st = &stringTab[(stidx + i) % STSIZE];
543 //if (!st.m) continue;
544 //printf(" st.m = %s\n", st.m.toChars());
545 //printf(" st.len = %d\n", st.len);
546 //printf(" st.sz = %d\n", st.sz);
547 if (st.m is irs.m &&
548 st.si &&
549 st.len == len &&
550 st.sz == sz &&
551 memcmp(st.string_, string_, sz * len) == 0)
552 {
553 //printf("use cached value\n");
554 si = st.si; // use cached value
555 goto L1;
556 }
557 }
558
559 stidx = (stidx + 1) % STSIZE;
560 st = &stringTab[stidx];
561
562 dt = null;
563 toDt(&dt);
564
565 si = symbol_generate(SC.SCstatic, type_fake(TYM.TYdarray));
566 si.Sdt = dt;
567 si.Sfl = FL.FLdata;
568 version (ELFOBJ) {// Burton
569 si.Sseg = Segment.CDATA;
570 }
571 version (MACHOBJ) {
572 si.Sseg = Segment.DATA;
573 }
574 outdata(si);
575
576 st.m = irs.m;
577 st.si = si;
578 st.string_ = string_;
579 st.len = len;
580 st.sz = sz;
581 L1:
582 e = el_var(si);
583 }
584 else if (tb.ty == TY.Tsarray)
585 {
586 Symbol *si;
587 dt_t *dt = null;
588
589 toDt(&dt);
590 dtnzeros(&dt, sz); // leave terminating 0
591
592 si = symbol_generate(SC.SCstatic,type_allocn(TYM.TYarray, tschar));
593 si.Sdt = dt;
594 si.Sfl = FL.FLdata;
595
596 version (ELFOBJ_OR_MACHOBJ) { // Burton
597 si.Sseg = Segment.CDATA;
598 }
599 outdata(si);
600
601 e = el_var(si);
602 }
603 else if (tb.ty == TY.Tpointer)
604 {
605 e = el_calloc();
606 e.Eoper = OPER.OPstring;
607 static if (true) {
608 // Match MEM_PH_FREE for OPstring in ztc\el.c
609 e.EV.ss.Vstring = cast(char*)malloc((len + 1) * sz);
610 memcpy(e.EV.ss.Vstring, string_, (len + 1) * sz);
611 } else {
612 e.EV.ss.Vstring = cast(char*)string_;
613 }
614 e.EV.ss.Vstrlen = (len + 1) * sz;
615 e.Ety = TYM.TYnptr;
616 }
617 else
618 {
619 writef("type is %s\n", type.toChars());
620 assert(0);
621 }
622 el_setLoc(e,loc);
623 return e;
624 }
625
626 dt_t** toDt(dt_t** pdt)
627 {
628 //printf("StringExp.toDt() '%s', type = %s\n", toChars(), type.toChars());
629 Type t = type.toBasetype();
630
631 // BUG: should implement some form of static string pooling
632 switch (t.ty)
633 {
634 case TY.Tarray:
635 dtdword(pdt, len);
636 pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_);
637 break;
638
639 case TY.Tsarray:
640 {
641 TypeSArray tsa = cast(TypeSArray)type;
642 long dim;
643
644 pdt = dtnbytes(pdt, len * sz, cast(const(char)*)string_);
645 if (tsa.dim)
646 {
647 dim = tsa.dim.toInteger();
648 if (len < dim)
649 {
650 // Pad remainder with 0
651 pdt = dtnzeros(pdt, cast(uint)((dim - len) * tsa.next.size()));
652 }
653 }
654 break;
655 }
656
657 case TY.Tpointer:
658 pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_);
659 break;
660
661 default:
662 writef("StringExp.toDt(type = %s)\n", type.toChars());
663 assert(0);
664 }
665
666 return pdt;
667 }
668 }
669