# HG changeset patch # User Anders Johnsen # Date 1208885451 -7200 # Node ID f977aa28eb3228a52174b07c59d1f1f46b1d026e # Parent 9fb190ad81a4c6ff3dfa4655820429dc35231c3c Now using arrays insted of switch - should speed things up a notch! diff -r 9fb190ad81a4 -r f977aa28eb32 lexer/Lexer.d --- a/lexer/Lexer.d Tue Apr 22 00:31:57 2008 +0200 +++ b/lexer/Lexer.d Tue Apr 22 19:30:51 2008 +0200 @@ -14,7 +14,40 @@ this (DataSource source) { this.source = source; - this.position = 0; + position = 0; + + + charTable.length = 256; + foreach( char c ; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + charTable[c] = CharType.Letter; + + foreach( char c ; "0123456789") + charTable[c] = CharType.Number; + + foreach( char c ; "(){};:.,=!<>+-*/") + charTable[c] = CharType.Symbol; + + foreach( char c ; " \n") + charTable[c] = CharType.Whitespace; + + symbolFunctions.length = 256; + + symbolFunctions['('] = &openParentheses; + symbolFunctions[')'] = &closeParentheses; + symbolFunctions['{'] = &openBrace; + symbolFunctions['}'] = &closeBrace; + symbolFunctions[';'] = &seperator; + symbolFunctions[':'] = : + symbolFunctions['.'] = ˙ + symbolFunctions[','] = , + symbolFunctions['='] = &eq; + symbolFunctions['!'] = ≠ + symbolFunctions['<'] = ≤ + symbolFunctions['>'] = ≥ + symbolFunctions['+'] = &add; + symbolFunctions['-'] = ⊂ + symbolFunctions['*'] = &mul; + symbolFunctions['/'] = ÷ } Token next () @@ -54,8 +87,130 @@ { return this.errors; } + private: + Token eq() + { + if(source.data[position] == '=') + return Token(Tok.Eq, Location(position++ - 1, source), 2); + return Token(Tok.Assign, Location(position - 1, source), 1); + } + Token openBrace() + { + return Token(Tok.OpenBrace, Location(position - 1, source), 1); + } + Token openParentheses() + { + return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); + } + Token closeParentheses() + { + return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); + } + Token closeBrace() + { + return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); + } + Token seperator() + { + Token t = Token(Tok.Seperator, Location(position - 1, source), 1); + return t; + } + Token colon() + { + return Token(Tok.Colon, Location(position - 1, this.source), 1); + } + Token dot() + { + return Token(Tok.Dot, Location(position - 1, this.source), 1); + } + Token comma() + { + return Token(Tok.Comma, Location(position - 1, this.source), 1); + } + Token ne() + { + if(source.data[position] == '=') + return Token(Tok.Ne, Location(position++ - 1, this.source), 2); + return Token(Tok.Not, Location(position - 1, this.source), 1); + } + Token le() + { + if(source.data[position] == '=') + return Token(Tok.Le, Location(position++ - 1, this.source), 2); + return Token(Tok.Lt, Location(position - 1, this.source), 1); + } + Token ge() + { + if(source.data[position] == '=') + return Token(Tok.Ge, Location(position++ - 1, this.source), 2); + return Token(Tok.Gt, Location(position - 1, this.source), 1); + } + Token add() + { + return Token(Tok.Add, Location(position - 1, this.source), 1); + } + Token sub() + { + return Token(Tok.Sub, Location(position - 1, this.source), 1); + } + Token mul() + { + return Token(Tok.Mul, Location(position - 1, this.source), 1); + } + Token div() + { + switch(source.data[position]) + { + case '/': + while(getNextChar != CharType.EOF) + { + if(source.data[position++] == '\n') + return this.next; + } + return Token(Tok.EOF, Location(position, this.source), 0); + case '*': + position += 2; + while(getNextChar != CharType.EOF) + { + ++position; + if(source.data[position-2] == '*') + if(source.data[position-1] == '/') + return this.next; + } + throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); + + case '+': + position += 2; + int nesting = 1; + while(getNextChar != CharType.EOF) + { + ++position; + if(source.data[position-2] == '+') + if(source.data[position-1] == '/') + { + position++; + nesting--; + } + + if(source.data[position-2] == '/') + if(source.data[position-1] == '+') + { + nesting++; + position++; + } + + if(nesting == 0) + return this.next; + } + throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); + + default: + return Token(Tok.Div, Location(position - 1, this.source), 1); + } + } + Token lexNumber () { int i = 0; @@ -69,97 +224,9 @@ Token lexSymbol () { - switch(source.data[position++]) - { - case '(': - return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); - case ')': - return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); - case '{': - return Token(Tok.OpenBrace, Location(position - 1, this.source), 1); - case '}': - return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); - case ';': - return Token(Tok.Seperator, Location(position - 1, this.source), 1); - case ':': - return Token(Tok.Colon, Location(position - 1, this.source), 1); - case '.': - return Token(Tok.Dot, Location(position - 1, this.source), 1); - case ',': - return Token(Tok.Comma, Location(position - 1, this.source), 1); - case '=': - if(source.data[position] == '=') - return Token(Tok.Eq, Location(position++ - 1, this.source), 2); - return Token(Tok.Assign, Location(position - 1, this.source), 1); - case '!': - if(source.data[position] == '=') - return Token(Tok.Ne, Location(position++ - 1, this.source), 2); - return Token(Tok.Not, Location(position - 1, this.source), 1); - case '<': - if(source.data[position] == '=') - return Token(Tok.Le, Location(position++ - 1, this.source), 2); - return Token(Tok.Lt, Location(position - 1, this.source), 1); - case '>': - if(source.data[position] == '=') - return Token(Tok.Ge, Location(position++ - 1, this.source), 2); - return Token(Tok.Gt, Location(position - 1, this.source), 1); - case '+': - return Token(Tok.Add, Location(position - 1, this.source), 1); - case '-': - return Token(Tok.Sub, Location(position - 1, this.source), 1); - case '*': - return Token(Tok.Mul, Location(position - 1, this.source), 1); - case '/': - switch(source.data[position]) - { - case '/': - while(getNextChar != CharType.EOF) - { - if(source.data[position++] == '\n') - return this.next; - } - return Token(Tok.EOF, Location(position, this.source), 0); + Token t = symbolFunctions[source.data[position++]](); - case '*': - position += 2; - while(getNextChar != CharType.EOF) - { - ++position; - if(source.data[position-2] == '*') - if(source.data[position-1] == '/') - return this.next; - } - throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); - - case '+': - position += 2; - int nesting = 1; - while(getNextChar != CharType.EOF) - { - ++position; - if(source.data[position-2] == '+') - if(source.data[position-1] == '/') - { - position++; - nesting--; - } - - if(source.data[position-2] == '/') - if(source.data[position-1] == '+') - { - nesting++; - position++; - } - - if(nesting == 0) - return this.next; - } - throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); - - default: - return Token(Tok.Div, Location(position - 1, this.source), 1); - } - } + return t; } Token lexLetter () @@ -196,40 +263,12 @@ char current = source.data[position + offset]; - if (current >= 'A' && current <= 'Z' || - current >= 'a' && current <= 'z' || current > 127) - return CharType.Letter; - - if (current >= '0' && current <= '9') - return CharType.Number; - - switch(current) - { - case ' ': - case '\n': - return CharType.Whitespace; + CharType c = charTable[current]; - case '(': - case ')': - case '{': - case '}': - case ';': - case ':': - case '.': - case ',': - case '=': - case '!': - case '<': - case '>': - case '+': - case '-': - case '*': - case '/': - return CharType.Symbol; +// if(c == CharType.INVALID) + // throw error(__LINE__, "Read invalid symbol: '%0'").arg(current); - default: - throw error(__LINE__, "Read invalid symbol: '%0'").arg(current); - } + return c; } @@ -238,13 +277,16 @@ return (new Error(msg)).loc(Location(position, source)); } + int position; DataSource source; - int position; Error[] errors; + CharType[] charTable; + Token delegate()[] symbolFunctions; } enum CharType : ubyte { + INVALID, Letter, Number, Symbol,