Mercurial > projects > ddmd
comparison dmd/StringExp.d @ 0:10317f0c89a5
Initial commit
author | korDen |
---|---|
date | Sat, 24 Oct 2009 08:42:06 +0400 |
parents | |
children | 7427ded8caf7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:10317f0c89a5 |
---|---|
1 module dmd.StringExp; | |
2 | |
3 import dmd.Expression; | |
4 import dmd.backend.elem; | |
5 import dmd.InterState; | |
6 import dmd.TypeSArray; | |
7 import dmd.CastExp; | |
8 import dmd.MATCH; | |
9 import dmd.TY; | |
10 import dmd.TypeDArray; | |
11 import dmd.Type; | |
12 import dmd.TOK; | |
13 import dmd.OutBuffer; | |
14 import dmd.Loc; | |
15 import dmd.Scope; | |
16 import dmd.IRState; | |
17 import dmd.StringExp; | |
18 import dmd.HdrGenState; | |
19 import dmd.Utf; | |
20 import dmd.backend.dt_t; | |
21 import dmd.backend.Symbol; | |
22 import dmd.backend.StringTab; | |
23 import dmd.backend.Util; | |
24 import dmd.backend.SC; | |
25 import dmd.backend.TYM; | |
26 import dmd.backend.FL; | |
27 import dmd.backend.TYPE; | |
28 import dmd.backend.OPER; | |
29 | |
30 import core.stdc.string; | |
31 | |
32 class StringExp : Expression | |
33 { | |
34 void* string_; // char, wchar, or dchar data | |
35 size_t len; // number of chars, wchars, or dchars | |
36 ubyte sz; // 1: char, 2: wchar, 4: dchar | |
37 ubyte committed; // !=0 if type is committed | |
38 ubyte postfix; // 'c', 'w', 'd' | |
39 | |
40 this(Loc loc, string s) | |
41 { | |
42 this(loc, s, 0); | |
43 } | |
44 | |
45 this(Loc loc, string s, ubyte postfix) | |
46 { | |
47 super(loc, TOK.TOKstring, StringExp.sizeof); | |
48 | |
49 this.string_ = cast(void*)s.ptr; | |
50 this.len = s.length; | |
51 this.sz = 1; | |
52 this.committed = 0; | |
53 this.postfix = postfix; | |
54 } | |
55 | |
56 int equals(Object o) | |
57 { | |
58 assert(false); | |
59 } | |
60 | |
61 string toChars() | |
62 { | |
63 assert(false); | |
64 } | |
65 | |
66 Expression semantic(Scope sc) | |
67 { | |
68 version (LOGSEMANTIC) { | |
69 printf("StringExp.semantic() %s\n", toChars()); | |
70 } | |
71 if (!type) | |
72 { | |
73 scope OutBuffer buffer = new OutBuffer(); | |
74 size_t newlen = 0; | |
75 string p; | |
76 size_t u; | |
77 dchar c; | |
78 | |
79 switch (postfix) | |
80 { | |
81 case 'd': | |
82 for (u = 0; u < len;) | |
83 { | |
84 p = utf_decodeChar(cast(string)string_[0..len], &u, &c); | |
85 if (p !is null) | |
86 { | |
87 error("%s", p); | |
88 break; | |
89 } | |
90 else | |
91 { | |
92 buffer.write4(c); | |
93 newlen++; | |
94 } | |
95 } | |
96 buffer.write4(0); | |
97 string_ = buffer.extractData(); | |
98 len = newlen; | |
99 sz = 4; | |
100 //type = new TypeSArray(Type.tdchar, new IntegerExp(loc, len, Type.tindex)); | |
101 type = new TypeDArray(Type.tdchar.invariantOf()); | |
102 committed = 1; | |
103 break; | |
104 | |
105 case 'w': | |
106 for (u = 0; u < len;) | |
107 { | |
108 p = utf_decodeChar(cast(string)string_[0..len], &u, &c); | |
109 if (p !is null) | |
110 { | |
111 error("%s", p); | |
112 break; | |
113 } | |
114 else | |
115 { | |
116 buffer.writeUTF16(c); | |
117 newlen++; | |
118 if (c >= 0x10000) | |
119 newlen++; | |
120 } | |
121 } | |
122 buffer.writeUTF16(0); | |
123 string_ = buffer.extractData(); | |
124 len = newlen; | |
125 sz = 2; | |
126 //type = new TypeSArray(Type.twchar, new IntegerExp(loc, len, Type.tindex)); | |
127 type = new TypeDArray(Type.twchar.invariantOf()); | |
128 committed = 1; | |
129 break; | |
130 | |
131 case 'c': | |
132 committed = 1; | |
133 default: | |
134 //type = new TypeSArray(Type.tchar, new IntegerExp(loc, len, Type.tindex)); | |
135 type = new TypeDArray(Type.tchar.invariantOf()); | |
136 break; | |
137 } | |
138 type = type.semantic(loc, sc); | |
139 //type = type.invariantOf(); | |
140 //printf("type = %s\n", type.toChars()); | |
141 } | |
142 return this; | |
143 } | |
144 | |
145 Expression interpret(InterState* istate) | |
146 { | |
147 assert(false); | |
148 } | |
149 | |
150 size_t length() | |
151 { | |
152 assert(false); | |
153 } | |
154 | |
155 StringExp toUTF8(Scope sc) | |
156 { | |
157 assert(false); | |
158 } | |
159 | |
160 Expression implicitCastTo(Scope sc, Type t) | |
161 { | |
162 //printf("StringExp.implicitCastTo(%s of type %s) => %s\n", toChars(), type.toChars(), t.toChars()); | |
163 ubyte committed = this.committed; | |
164 Expression e = Expression.implicitCastTo(sc, t); | |
165 if (e.op == TOK.TOKstring) | |
166 { | |
167 // Retain polysemous nature if it started out that way | |
168 (cast(StringExp)e).committed = committed; | |
169 } | |
170 return e; | |
171 } | |
172 | |
173 MATCH implicitConvTo(Type t) | |
174 { | |
175 MATCH m; | |
176 | |
177 static if (false) { | |
178 printf("StringExp.implicitConvTo(this=%s, committed=%d, type=%s, t=%s)\n", | |
179 toChars(), committed, type.toChars(), t.toChars()); | |
180 } | |
181 if (!committed) | |
182 { | |
183 if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid) | |
184 { | |
185 return MATCH.MATCHnomatch; | |
186 } | |
187 if (type.ty == TY.Tsarray || type.ty == TY.Tarray || type.ty == TY.Tpointer) | |
188 { | |
189 TY tyn = type.nextOf().ty; | |
190 if (tyn == TY.Tchar || tyn == TY.Twchar || tyn == TY.Tdchar) | |
191 { | |
192 Type tn; | |
193 MATCH mm; | |
194 | |
195 switch (t.ty) | |
196 { | |
197 case TY.Tsarray: | |
198 if (type.ty == TY.Tsarray) | |
199 { | |
200 if ((cast(TypeSArray)type).dim.toInteger() != | |
201 (cast(TypeSArray)t).dim.toInteger()) | |
202 return MATCH.MATCHnomatch; | |
203 TY tynto = t.nextOf().ty; | |
204 if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar) | |
205 return MATCH.MATCHexact; | |
206 } | |
207 else if (type.ty == TY.Tarray) | |
208 { | |
209 if (length() > (cast(TypeSArray)t).dim.toInteger()) | |
210 return MATCH.MATCHnomatch; | |
211 TY tynto = t.nextOf().ty; | |
212 if (tynto == TY.Tchar || tynto == TY.Twchar || tynto == TY.Tdchar) | |
213 return MATCH.MATCHexact; | |
214 } | |
215 case TY.Tarray: | |
216 case TY.Tpointer: | |
217 tn = t.nextOf(); | |
218 mm = MATCH.MATCHexact; | |
219 if (type.nextOf().mod != tn.mod) | |
220 { | |
221 if (!tn.isConst()) | |
222 return MATCH.MATCHnomatch; | |
223 mm = MATCH.MATCHconst; | |
224 } | |
225 switch (tn.ty) | |
226 { | |
227 case TY.Tchar: | |
228 case TY.Twchar: | |
229 case TY.Tdchar: | |
230 return mm; | |
231 } | |
232 break; | |
233 default: | |
234 break; /// | |
235 } | |
236 } | |
237 } | |
238 } | |
239 return Expression.implicitConvTo(t); | |
240 static if (false) { | |
241 m = cast(MATCH)type.implicitConvTo(t); | |
242 if (m) | |
243 { | |
244 return m; | |
245 } | |
246 | |
247 return MATCH.MATCHnomatch; | |
248 } | |
249 } | |
250 | |
251 static uint X(TY tf, TY tt) { | |
252 return ((tf) * 256 + (tt)); | |
253 } | |
254 | |
255 Expression castTo(Scope sc, Type t) | |
256 { | |
257 /* This follows copy-on-write; any changes to 'this' | |
258 * will result in a copy. | |
259 * The this.string member is considered immutable. | |
260 */ | |
261 StringExp se; | |
262 Type tb; | |
263 int copied = 0; | |
264 | |
265 //printf("StringExp.castTo(t = %s), '%s' committed = %d\n", t.toChars(), toChars(), committed); | |
266 | |
267 if (!committed && t.ty == TY.Tpointer && t.nextOf().ty == TY.Tvoid) | |
268 { | |
269 error("cannot convert string literal to void*"); | |
270 } | |
271 | |
272 se = this; | |
273 if (!committed) | |
274 { | |
275 se = cast(StringExp)copy(); | |
276 se.committed = 1; | |
277 copied = 1; | |
278 } | |
279 | |
280 if (type == t) | |
281 { | |
282 return se; | |
283 } | |
284 | |
285 tb = t.toBasetype(); | |
286 //printf("\ttype = %s\n", type.toChars()); | |
287 if (tb.ty == TY.Tdelegate && type.toBasetype().ty != TY.Tdelegate) | |
288 return Expression.castTo(sc, t); | |
289 | |
290 Type typeb = type.toBasetype(); | |
291 if (typeb == tb) | |
292 { | |
293 if (!copied) | |
294 { | |
295 se = cast(StringExp)copy(); | |
296 copied = 1; | |
297 } | |
298 se.type = t; | |
299 return se; | |
300 } | |
301 | |
302 if (committed && tb.ty == TY.Tsarray && typeb.ty == TY.Tarray) | |
303 { | |
304 se = cast(StringExp)copy(); | |
305 se.sz = cast(ubyte)tb.nextOf().size(); | |
306 se.len = (len * sz) / se.sz; | |
307 se.committed = 1; | |
308 se.type = t; | |
309 return se; | |
310 } | |
311 | |
312 if (tb.ty != TY.Tsarray && tb.ty != TY.Tarray && tb.ty != TY.Tpointer) | |
313 { | |
314 if (!copied) | |
315 { | |
316 se = cast(StringExp)copy(); | |
317 copied = 1; | |
318 } | |
319 goto Lcast; | |
320 } | |
321 if (typeb.ty != TY.Tsarray && typeb.ty != TY.Tarray && typeb.ty != TY.Tpointer) | |
322 { | |
323 if (!copied) | |
324 { | |
325 se = cast(StringExp)copy(); | |
326 copied = 1; | |
327 } | |
328 goto Lcast; | |
329 } | |
330 | |
331 if (typeb.nextOf().size() == tb.nextOf().size()) | |
332 { | |
333 if (!copied) | |
334 { | |
335 se = cast(StringExp)copy(); | |
336 copied = 1; | |
337 } | |
338 | |
339 if (tb.ty == TY.Tsarray) | |
340 goto L2; // handle possible change in static array dimension | |
341 se.type = t; | |
342 return se; | |
343 } | |
344 | |
345 if (committed) | |
346 goto Lcast; | |
347 | |
348 { | |
349 scope OutBuffer buffer = new OutBuffer(); | |
350 size_t newlen = 0; | |
351 TY tfty = typeb.nextOf().toBasetype().ty; | |
352 TY ttty = tb.nextOf().toBasetype().ty; | |
353 switch (X(tfty, ttty)) | |
354 { | |
355 case X(TY.Tchar, TY.Tchar): | |
356 case X(TY.Twchar,TY.Twchar): | |
357 case X(TY.Tdchar,TY.Tdchar): | |
358 break; | |
359 | |
360 case X(TY.Tchar, TY.Twchar): | |
361 for (size_t u = 0; u < len;) | |
362 { | |
363 dchar c; | |
364 string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c); | |
365 if (p !is null) | |
366 error("%s", p); | |
367 else | |
368 buffer.writeUTF16(c); | |
369 } | |
370 newlen = buffer.offset / 2; | |
371 buffer.writeUTF16(0); | |
372 goto L1; | |
373 | |
374 case X(TY.Tchar, TY.Tdchar): | |
375 for (size_t u = 0; u < len;) | |
376 { | |
377 dchar c; | |
378 string p = utf_decodeChar(cast(string)se.string_[0..len], &u, &c); | |
379 if (p !is null) | |
380 error("%s", p); | |
381 buffer.write4(c); | |
382 newlen++; | |
383 } | |
384 buffer.write4(0); | |
385 goto L1; | |
386 | |
387 case X(TY.Twchar,TY.Tchar): | |
388 for (size_t u = 0; u < len;) | |
389 { | |
390 dchar c; | |
391 string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c); | |
392 if (p) | |
393 error("%s", p); | |
394 else | |
395 buffer.writeUTF8(c); | |
396 } | |
397 newlen = buffer.offset; | |
398 buffer.writeUTF8(0); | |
399 goto L1; | |
400 | |
401 case X(TY.Twchar,TY.Tdchar): | |
402 for (size_t u = 0; u < len;) | |
403 { | |
404 dchar c; | |
405 string p = utf_decodeWchar(cast(wstring)se.string_[0..len], &u, &c); | |
406 if (p) | |
407 error("%s", p); | |
408 buffer.write4(c); | |
409 newlen++; | |
410 } | |
411 buffer.write4(0); | |
412 goto L1; | |
413 | |
414 case X(TY.Tdchar,TY.Tchar): | |
415 for (size_t u = 0; u < len; u++) | |
416 { | |
417 dchar c = (cast(dchar*)se.string_)[u]; | |
418 if (!utf_isValidDchar(c)) | |
419 error("invalid UCS-32 char \\U%08x", c); | |
420 else | |
421 buffer.writeUTF8(c); | |
422 newlen++; | |
423 } | |
424 newlen = buffer.offset; | |
425 buffer.writeUTF8(0); | |
426 goto L1; | |
427 | |
428 case X(TY.Tdchar,TY.Twchar): | |
429 for (size_t u = 0; u < len; u++) | |
430 { | |
431 dchar c = (cast(dchar*)se.string_)[u]; | |
432 if (!utf_isValidDchar(c)) | |
433 error("invalid UCS-32 char \\U%08x", c); | |
434 else | |
435 buffer.writeUTF16(c); | |
436 newlen++; | |
437 } | |
438 newlen = buffer.offset / 2; | |
439 buffer.writeUTF16(0); | |
440 goto L1; | |
441 | |
442 L1: | |
443 if (!copied) | |
444 { | |
445 se = cast(StringExp)copy(); | |
446 copied = 1; | |
447 } | |
448 se.string_ = buffer.extractData(); | |
449 se.len = newlen; | |
450 se.sz = cast(ubyte)tb.nextOf().size(); | |
451 break; | |
452 | |
453 default: | |
454 assert(typeb.nextOf().size() != tb.nextOf().size()); | |
455 goto Lcast; | |
456 } | |
457 } | |
458 L2: | |
459 assert(copied); | |
460 | |
461 // See if need to truncate or extend the literal | |
462 if (tb.ty == TY.Tsarray) | |
463 { | |
464 int dim2 = cast(int)(cast(TypeSArray)tb).dim.toInteger(); | |
465 | |
466 //printf("dim from = %d, to = %d\n", se.len, dim2); | |
467 | |
468 // Changing dimensions | |
469 if (dim2 != se.len) | |
470 { | |
471 // Copy when changing the string literal | |
472 uint newsz = se.sz; | |
473 void *s; | |
474 int d; | |
475 | |
476 d = (dim2 < se.len) ? dim2 : se.len; | |
477 s = cast(ubyte*)malloc((dim2 + 1) * newsz); | |
478 memcpy(s, se.string_, d * newsz); | |
479 // Extend with 0, add terminating 0 | |
480 memset(cast(char*)s + d * newsz, 0, (dim2 + 1 - d) * newsz); | |
481 se.string_ = s; | |
482 se.len = dim2; | |
483 } | |
484 } | |
485 se.type = t; | |
486 return se; | |
487 | |
488 Lcast: | |
489 Expression e = new CastExp(loc, se, t); | |
490 e.type = t; // so semantic() won't be run on e | |
491 return e; | |
492 } | |
493 | |
494 int compare(Object obj) | |
495 { | |
496 assert(false); | |
497 } | |
498 | |
499 bool isBool(bool result) | |
500 { | |
501 assert(false); | |
502 } | |
503 | |
504 uint charAt(size_t i) | |
505 { | |
506 assert(false); | |
507 } | |
508 | |
509 void toCBuffer(OutBuffer buf, HdrGenState* hgs) | |
510 { | |
511 assert(false); | |
512 } | |
513 | |
514 void toMangleBuffer(OutBuffer buf) | |
515 { | |
516 assert(false); | |
517 } | |
518 | |
519 elem* toElem(IRState* irs) | |
520 { | |
521 elem* e; | |
522 Type tb = type.toBasetype(); | |
523 | |
524 static if (false) { | |
525 printf("StringExp.toElem() %s, type = %s\n", toChars(), type.toChars()); | |
526 } | |
527 | |
528 if (tb.ty == TY.Tarray) | |
529 { | |
530 Symbol* si; | |
531 dt_t* dt; | |
532 StringTab* st; | |
533 | |
534 static if (false) { | |
535 printf("irs.m = %p\n", irs.m); | |
536 printf(" m = %s\n", irs.m.toChars()); | |
537 printf(" len = %d\n", len); | |
538 printf(" sz = %d\n", sz); | |
539 } | |
540 for (size_t i = 0; i < STSIZE; i++) | |
541 { | |
542 st = &stringTab[(stidx + i) % STSIZE]; | |
543 //if (!st.m) continue; | |
544 //printf(" st.m = %s\n", st.m.toChars()); | |
545 //printf(" st.len = %d\n", st.len); | |
546 //printf(" st.sz = %d\n", st.sz); | |
547 if (st.m is irs.m && | |
548 st.si && | |
549 st.len == len && | |
550 st.sz == sz && | |
551 memcmp(st.string_, string_, sz * len) == 0) | |
552 { | |
553 //printf("use cached value\n"); | |
554 si = st.si; // use cached value | |
555 goto L1; | |
556 } | |
557 } | |
558 | |
559 stidx = (stidx + 1) % STSIZE; | |
560 st = &stringTab[stidx]; | |
561 | |
562 dt = null; | |
563 toDt(&dt); | |
564 | |
565 si = symbol_generate(SC.SCstatic, type_fake(TYM.TYdarray)); | |
566 si.Sdt = dt; | |
567 si.Sfl = FL.FLdata; | |
568 version (ELFOBJ) {// Burton | |
569 si.Sseg = Segment.CDATA; | |
570 } | |
571 version (MACHOBJ) { | |
572 si.Sseg = Segment.DATA; | |
573 } | |
574 outdata(si); | |
575 | |
576 st.m = irs.m; | |
577 st.si = si; | |
578 st.string_ = string_; | |
579 st.len = len; | |
580 st.sz = sz; | |
581 L1: | |
582 e = el_var(si); | |
583 } | |
584 else if (tb.ty == TY.Tsarray) | |
585 { | |
586 Symbol *si; | |
587 dt_t *dt = null; | |
588 | |
589 toDt(&dt); | |
590 dtnzeros(&dt, sz); // leave terminating 0 | |
591 | |
592 si = symbol_generate(SC.SCstatic,type_allocn(TYM.TYarray, tschar)); | |
593 si.Sdt = dt; | |
594 si.Sfl = FL.FLdata; | |
595 | |
596 version (ELFOBJ_OR_MACHOBJ) { // Burton | |
597 si.Sseg = Segment.CDATA; | |
598 } | |
599 outdata(si); | |
600 | |
601 e = el_var(si); | |
602 } | |
603 else if (tb.ty == TY.Tpointer) | |
604 { | |
605 e = el_calloc(); | |
606 e.Eoper = OPER.OPstring; | |
607 static if (true) { | |
608 // Match MEM_PH_FREE for OPstring in ztc\el.c | |
609 e.EV.ss.Vstring = cast(char*)malloc((len + 1) * sz); | |
610 memcpy(e.EV.ss.Vstring, string_, (len + 1) * sz); | |
611 } else { | |
612 e.EV.ss.Vstring = cast(char*)string_; | |
613 } | |
614 e.EV.ss.Vstrlen = (len + 1) * sz; | |
615 e.Ety = TYM.TYnptr; | |
616 } | |
617 else | |
618 { | |
619 writef("type is %s\n", type.toChars()); | |
620 assert(0); | |
621 } | |
622 el_setLoc(e,loc); | |
623 return e; | |
624 } | |
625 | |
626 dt_t** toDt(dt_t** pdt) | |
627 { | |
628 //printf("StringExp.toDt() '%s', type = %s\n", toChars(), type.toChars()); | |
629 Type t = type.toBasetype(); | |
630 | |
631 // BUG: should implement some form of static string pooling | |
632 switch (t.ty) | |
633 { | |
634 case TY.Tarray: | |
635 dtdword(pdt, len); | |
636 pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_); | |
637 break; | |
638 | |
639 case TY.Tsarray: | |
640 { | |
641 TypeSArray tsa = cast(TypeSArray)type; | |
642 long dim; | |
643 | |
644 pdt = dtnbytes(pdt, len * sz, cast(const(char)*)string_); | |
645 if (tsa.dim) | |
646 { | |
647 dim = tsa.dim.toInteger(); | |
648 if (len < dim) | |
649 { | |
650 // Pad remainder with 0 | |
651 pdt = dtnzeros(pdt, cast(uint)((dim - len) * tsa.next.size())); | |
652 } | |
653 } | |
654 break; | |
655 } | |
656 | |
657 case TY.Tpointer: | |
658 pdt = dtabytes(pdt, TYM.TYnptr, 0, (len + 1) * sz, cast(char*)string_); | |
659 break; | |
660 | |
661 default: | |
662 writef("StringExp.toDt(type = %s)\n", type.toChars()); | |
663 assert(0); | |
664 } | |
665 | |
666 return pdt; | |
667 } | |
668 } | |
669 |