Mercurial > projects > dil
changeset 419:89e40d43065d
Added new member 'p_newl' to Lexer and applied some fixes.
Added method set_p_newl() that sets this.p_newl one character past the
previously encountered newline.
Not using decodeUTF8() to check for LS and PS in cases where the newline
mustn't be skipped.
Added a particular assert() to almost every case '\n' statement.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Fri, 28 Sep 2007 12:10:11 +0200 |
parents | 7354f15cd5e9 |
children | ce644d724d87 |
files | trunk/src/dil/Lexer.d |
diffstat | 1 files changed, 55 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/src/dil/Lexer.d Thu Sep 27 17:06:28 2007 +0200 +++ b/trunk/src/dil/Lexer.d Fri Sep 28 12:10:11 2007 +0200 @@ -49,6 +49,7 @@ char* p; /// Points to the current character in the source text. char* end; /// Points one character past the end of the source text. + char* p_newl; uint loc = 1; /// Actual line of code. uint loc_old; /// Store actual line number when #line token is scanned. @@ -182,6 +183,22 @@ } } + void set_p_newl(char* p) + { + assert(delegate() + { + if (!((p-1) >= text.ptr && p < end)) + return false; + // Check that previous character is a newline. + if (p[-1] != '\n' && p[-1] != '\r' && + p[-1] != LS[2] && p[-1] != PS[2]) + return false; + return true; + }() == true + ); + this.p_newl = p; + } + public void scan_(out Token t) in { @@ -205,8 +222,10 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++p; ++loc; + set_p_newl(p); continue; case LS[0]: if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) @@ -306,13 +325,12 @@ { case '\r', '\n', 0, _Z_: break; + case LS[0]: + if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) + break; default: if (c & 128) - { - c = decodeUTF8(); - if (c == LSd || c == PSd) - break; - } + decodeUTF8(); continue; } break; // Exit loop. @@ -685,8 +703,10 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++p; ++loc; + set_p_newl(p); continue; case LS[0]: if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) @@ -818,13 +838,12 @@ { case '\r', '\n', 0, _Z_: break; + case LS[0]: + if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) + break; default: if (c & 128) - { - c = decodeUTF8(); - if (c == LSd || c == PSd) - break; - } + decodeUTF8(); continue; } break; // Exit loop. @@ -1113,7 +1132,9 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++loc; + set_p_newl(p+1); continue; case 0, _Z_: error(MID.UnterminatedBlockComment); @@ -1160,7 +1181,9 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++loc; + set_p_newl(p+1); continue; case 0, _Z_: error(MID.UnterminatedNestedComment); @@ -1230,8 +1253,10 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++loc; c = '\n'; // Convert EndOfLine to \n. + set_p_newl(p+1); break; case 0, _Z_: error(MID.UnterminatedString); @@ -1239,15 +1264,10 @@ default: if (c & 128) { -// char* begin = p; c = decodeUTF8(); if (c == LSd || c == PSd) goto case '\n'; - // We don't copy per pointer because we might include - // invalid, skipped utf-8 sequences. See decodeUTF8(). -// ++p; -// buffer ~= begin[0 .. p - begin]; encodeUTF8(buffer, c); continue; } @@ -1280,15 +1300,18 @@ case '\'': ++p; id = MID.EmptyCharacterLiteral; + // fall through case '\n', '\r', 0, _Z_: goto Lerr; + case LS[0]: + if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) + goto Lerr; + // fall through default: uint c = *p; if (c & 128) { c = decodeUTF8(); - if (c == LSd || c == PSd) - goto Lerr; if (c <= 0xFFFF) type = TOK.WCharLiteral; else @@ -1337,8 +1360,10 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); c = '\n'; // Convert EndOfLine ('\r','\r\n','\n',LS,PS) to '\n' ++loc; + set_p_newl(p+1); break; case '`': case '"': @@ -1403,7 +1428,9 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++loc; + set_p_newl(p+1); continue; default: if (ishexad(c)) @@ -1427,17 +1454,12 @@ continue; } else if (isspace(c)) - continue; - - if (c & 128) + continue; // Skip spaces. + else if (c & 128) { c = decodeUTF8(); if (c == LSd || c == PSd) - { - ++p; ++p; - ++loc; - continue; - } + goto case '\n'; } else if (c == 0 || c == _Z_) { @@ -1487,15 +1509,16 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); ++p; ++loc; + set_p_newl(p); return '\n'; case LS[0]: if (p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2])) { - ++p; ++p; ++p; - ++loc; - return '\n'; + ++p; ++p; + goto case '\n'; } default: } @@ -1504,8 +1527,8 @@ // Skip leading newlines: while (scanNewline() != 0){} - assert(*p != '\n' && *p != '\r'); - assert(!(*p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))); + assert(*p != '\n' && *p != '\r' && + !(*p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))); char* begin = p; c = *p; @@ -1554,8 +1577,10 @@ if (p[1] == '\n') ++p; case '\n': + assert(*p == '\n' || *p == '\r' || *p == LS[2] || *p == PS[2]); c = '\n'; // Convert EndOfLine ('\r','\r\n','\n',LS,PS) to '\n' ++loc; + set_p_newl(p+1); break; case 0, _Z_: // TODO: error(MID.UnterminatedDelimitedString);