comparison dmd/lexer.c @ 19:788401029ecf trunk

[svn r23] * Updated to DMD 1.021
author lindquist
date Thu, 04 Oct 2007 03:42:56 +0200
parents c53b6e3fe49a
children 0ab29b838084
comparison
equal deleted inserted replaced
18:c05ef76f1c20 19:788401029ecf
457 size_t idx; 457 size_t idx;
458 458
459 if (!p || !*p) 459 if (!p || !*p)
460 goto Linvalid; 460 goto Linvalid;
461 461
462 if (isdigit(*p)) 462 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
463 goto Linvalid; 463 goto Linvalid;
464 464
465 len = strlen(p); 465 len = strlen(p);
466 idx = 0; 466 idx = 0;
467 while (p[idx]) 467 while (p[idx])
566 goto case_ident; 566 goto case_ident;
567 p++; 567 p++;
568 t->value = hexStringConstant(t); 568 t->value = hexStringConstant(t);
569 return; 569 return;
570 570
571 #if V2
572 case 'q':
573 if (p[1] == '"')
574 {
575 p++;
576 t->value = delimitedStringConstant(t);
577 return;
578 }
579 else if (p[1] == '{')
580 {
581 p++;
582 t->value = tokenStringConstant(t);
583 return;
584 }
585 else
586 goto case_ident;
587 #endif
571 588
572 case '"': 589 case '"':
573 t->value = escapeStringConstant(t,0); 590 t->value = escapeStringConstant(t,0);
574 return; 591 return;
575 592
596 case 'L': 613 case 'L':
597 #endif 614 #endif
598 case 'a': case 'b': case 'c': case 'd': case 'e': 615 case 'a': case 'b': case 'c': case 'd': case 'e':
599 case 'f': case 'g': case 'h': case 'i': case 'j': 616 case 'f': case 'g': case 'h': case 'i': case 'j':
600 case 'k': case 'm': case 'n': case 'o': 617 case 'k': case 'm': case 'n': case 'o':
618 #if V2
619 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
620 #else
601 case 'p': case 'q': /*case 'r':*/ case 's': case 't': 621 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
622 #endif
602 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': 623 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
603 case 'z': 624 case 'z':
604 case 'A': case 'B': case 'C': case 'D': case 'E': 625 case 'A': case 'B': case 'C': case 'D': case 'E':
605 case 'F': case 'G': case 'H': case 'I': case 'J': 626 case 'F': case 'G': case 'H': case 'I': case 'J':
606 case 'K': case 'M': case 'N': case 'O': 627 case 'K': case 'M': case 'N': case 'O':
1429 break; 1450 break;
1430 } 1451 }
1431 } 1452 }
1432 } 1453 }
1433 1454
1455
1456 #if V2
1457 /**************************************
1458 * Lex delimited strings:
1459 * q"(foo(xxx))" // "foo(xxx)"
1460 * q"[foo(]" // "foo("
1461 * q"/foo]/" // "foo]"
1462 * q"HERE
1463 * foo
1464 * HERE" // "foo\n"
1465 * Input:
1466 * p is on the "
1467 */
1468
1469 TOK Lexer::delimitedStringConstant(Token *t)
1470 { unsigned c;
1471 Loc start = loc;
1472 unsigned delimleft = 0;
1473 unsigned delimright = 0;
1474 unsigned nest = 1;
1475 unsigned nestcount;
1476 Identifier *hereid = NULL;
1477 unsigned blankrol = 0;
1478 unsigned startline = 0;
1479
1480 p++;
1481 stringbuffer.reset();
1482 while (1)
1483 {
1484 c = *p++;
1485 //printf("c = '%c'\n", c);
1486 switch (c)
1487 {
1488 case '\n':
1489 Lnextline:
1490 printf("Lnextline\n");
1491 loc.linnum++;
1492 startline = 1;
1493 if (blankrol)
1494 { blankrol = 0;
1495 continue;
1496 }
1497 if (hereid)
1498 {
1499 stringbuffer.writeUTF8(c);
1500 continue;
1501 }
1502 break;
1503
1504 case '\r':
1505 if (*p == '\n')
1506 continue; // ignore
1507 c = '\n'; // treat EndOfLine as \n character
1508 goto Lnextline;
1509
1510 case 0:
1511 case 0x1A:
1512 goto Lerror;
1513
1514 default:
1515 if (c & 0x80)
1516 { p--;
1517 c = decodeUTF();
1518 p++;
1519 if (c == PS || c == LS)
1520 goto Lnextline;
1521 }
1522 break;
1523 }
1524 if (delimleft == 0)
1525 { delimleft = c;
1526 nest = 1;
1527 nestcount = 1;
1528 if (c == '(')
1529 delimright = ')';
1530 else if (c == '{')
1531 delimright = '}';
1532 else if (c == '[')
1533 delimright = ']';
1534 else if (c == '<')
1535 delimright = '>';
1536 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1537 { // Start of identifier; must be a heredoc
1538 Token t;
1539 p--;
1540 scan(&t); // read in heredoc identifier
1541 if (t.value != TOKidentifier)
1542 { error("identifier expected for heredoc, not %s", t.toChars());
1543 delimright = c;
1544 }
1545 else
1546 { hereid = t.ident;
1547 printf("hereid = '%s'\n", hereid->toChars());
1548 blankrol = 1;
1549 }
1550 nest = 0;
1551 }
1552 else
1553 { delimright = c;
1554 nest = 0;
1555 }
1556 }
1557 else
1558 {
1559 if (blankrol)
1560 { error("heredoc rest of line should be blank");
1561 blankrol = 0;
1562 continue;
1563 }
1564 if (nest == 1)
1565 {
1566 if (c == delimleft)
1567 nestcount++;
1568 else if (c == delimright)
1569 { nestcount--;
1570 if (nestcount == 0)
1571 goto Ldone;
1572 }
1573 }
1574 else if (c == delimright)
1575 goto Ldone;
1576 if (startline && isalpha(c))
1577 { Token t;
1578 unsigned char *psave = p;
1579 p--;
1580 scan(&t); // read in possible heredoc identifier
1581 printf("endid = '%s'\n", t.ident->toChars());
1582 if (t.value == TOKidentifier && t.ident->equals(hereid))
1583 { /* should check that rest of line is blank
1584 */
1585 printf("done\n");
1586 goto Ldone;
1587 }
1588 p = psave;
1589 }
1590 stringbuffer.writeUTF8(c);
1591 startline = 0;
1592 }
1593 }
1594
1595 Ldone:
1596 if (*p == '"')
1597 p++;
1598 else
1599 error("delimited string must end in %c\"", delimright);
1600 t->len = stringbuffer.offset;
1601 stringbuffer.writeByte(0);
1602 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1603 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1604 stringPostfix(t);
1605 return TOKstring;
1606
1607 Lerror:
1608 error("unterminated string constant starting at %s", start.toChars());
1609 t->ustring = (unsigned char *)"";
1610 t->len = 0;
1611 t->postfix = 0;
1612 return TOKstring;
1613 }
1614
1615 /**************************************
1616 * Lex delimited strings:
1617 * q{ foo(xxx) } // " foo(xxx) "
1618 * q{foo(} // "foo("
1619 * q{{foo}"}"} // "{foo}"}""
1620 * Input:
1621 * p is on the q
1622 */
1623
1624 TOK Lexer::tokenStringConstant(Token *t)
1625 {
1626 unsigned nest = 1;
1627 Loc start = loc;
1628 unsigned char *pstart = ++p;
1629
1630 while (1)
1631 { Token tok;
1632
1633 scan(&tok);
1634 switch (tok.value)
1635 {
1636 case TOKlcurly:
1637 nest++;
1638 continue;
1639
1640 case TOKrcurly:
1641 if (--nest == 0)
1642 goto Ldone;
1643 continue;
1644
1645 case TOKeof:
1646 goto Lerror;
1647
1648 default:
1649 continue;
1650 }
1651 }
1652
1653 Ldone:
1654 t->len = p - 1 - pstart;
1655 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1656 memcpy(t->ustring, pstart, t->len);
1657 t->ustring[t->len] = 0;
1658 stringPostfix(t);
1659 return TOKstring;
1660
1661 Lerror:
1662 error("unterminated token string constant starting at %s", start.toChars());
1663 t->ustring = (unsigned char *)"";
1664 t->len = 0;
1665 t->postfix = 0;
1666 return TOKstring;
1667 }
1668
1669 #endif
1670
1671
1434 /************************************** 1672 /**************************************
1435 */ 1673 */
1436 1674
1437 TOK Lexer::escapeStringConstant(Token *t, int wide) 1675 TOK Lexer::escapeStringConstant(Token *t, int wide)
1438 { unsigned c; 1676 { unsigned c;
2639 { "version", TOKversion }, 2877 { "version", TOKversion },
2640 2878
2641 // Added after 1.0 2879 // Added after 1.0
2642 { "ref", TOKref }, 2880 { "ref", TOKref },
2643 { "macro", TOKmacro }, 2881 { "macro", TOKmacro },
2882 #if V2
2883 { "__traits", TOKtraits },
2884 #endif
2644 }; 2885 };
2645 2886
2646 int Token::isKeyword() 2887 int Token::isKeyword()
2647 { 2888 {
2648 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) 2889 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)