Mercurial > projects > dang
view lexer/Lexer.d @ 41:f977aa28eb32 new_gen
Now using arrays insted of switch - should speed things up a notch!
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 22 Apr 2008 19:30:51 +0200 |
parents | ce17bea8e9bd |
children | 4e879f82dd64 |
line wrap: on
line source
module lexer.Lexer; import misc.Error, misc.DataSource; import lexer.Token, lexer.Keyword; import tango.io.Stdout; class Lexer { public: this (DataSource source) { this.source = source; position = 0; charTable.length = 256; foreach( char c ; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") charTable[c] = CharType.Letter; foreach( char c ; "0123456789") charTable[c] = CharType.Number; foreach( char c ; "(){};:.,=!<>+-*/") charTable[c] = CharType.Symbol; foreach( char c ; " \n") charTable[c] = CharType.Whitespace; symbolFunctions.length = 256; symbolFunctions['('] = &openParentheses; symbolFunctions[')'] = &closeParentheses; symbolFunctions['{'] = &openBrace; symbolFunctions['}'] = &closeBrace; symbolFunctions[';'] = &seperator; symbolFunctions[':'] = : symbolFunctions['.'] = ˙ symbolFunctions[','] = , symbolFunctions['='] = &eq; symbolFunctions['!'] = ≠ symbolFunctions['<'] = ≤ symbolFunctions['>'] = ≥ symbolFunctions['+'] = &add; symbolFunctions['-'] = ⊂ symbolFunctions['*'] = &mul; symbolFunctions['/'] = ÷ } Token next () { switch (getNextChar) { case CharType.EOF: Location l; return Token (Tok.EOF, l, 0); case CharType.Whitespace: position += 1; return this.next; case CharType.Symbol: return lexSymbol; case CharType.Letter: return lexLetter; case CharType.Number: return lexNumber; } } Token peek ( int skip = 0) { int oldPosition = this.position; while(skip-- > 0) this.next; Token t = this.next; this.position = oldPosition; return t; } public Error[] getErrors() { return this.errors; } private: Token eq() { if(source.data[position] == '=') return Token(Tok.Eq, Location(position++ - 1, source), 2); return Token(Tok.Assign, Location(position - 1, source), 1); } Token openBrace() { return Token(Tok.OpenBrace, Location(position - 1, source), 1); } Token openParentheses() { return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); } Token closeParentheses() { return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); } Token closeBrace() { return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); } Token seperator() { Token t = Token(Tok.Seperator, Location(position - 1, source), 1); return t; } Token colon() { return Token(Tok.Colon, Location(position - 1, this.source), 1); } Token dot() { return Token(Tok.Dot, Location(position - 1, this.source), 1); } Token comma() { return Token(Tok.Comma, Location(position - 1, this.source), 1); } Token ne() { if(source.data[position] == '=') return Token(Tok.Ne, Location(position++ - 1, this.source), 2); return Token(Tok.Not, Location(position - 1, this.source), 1); } Token le() { if(source.data[position] == '=') return Token(Tok.Le, Location(position++ - 1, this.source), 2); return Token(Tok.Lt, Location(position - 1, this.source), 1); } Token ge() { if(source.data[position] == '=') return Token(Tok.Ge, Location(position++ - 1, this.source), 2); return Token(Tok.Gt, Location(position - 1, this.source), 1); } Token add() { return Token(Tok.Add, Location(position - 1, this.source), 1); } Token sub() { return Token(Tok.Sub, Location(position - 1, this.source), 1); } Token mul() { return Token(Tok.Mul, Location(position - 1, this.source), 1); } Token div() { switch(source.data[position]) { case '/': while(getNextChar != CharType.EOF) { if(source.data[position++] == '\n') return this.next; } return Token(Tok.EOF, Location(position, this.source), 0); case '*': position += 2; while(getNextChar != CharType.EOF) { ++position; if(source.data[position-2] == '*') if(source.data[position-1] == '/') return this.next; } throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); case '+': position += 2; int nesting = 1; while(getNextChar != CharType.EOF) { ++position; if(source.data[position-2] == '+') if(source.data[position-1] == '/') { position++; nesting--; } if(source.data[position-2] == '/') if(source.data[position-1] == '+') { nesting++; position++; } if(nesting == 0) return this.next; } throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); default: return Token(Tok.Div, Location(position - 1, this.source), 1); } } Token lexNumber () { int i = 0; while(getNextChar(++i) == CharType.Number) {} position += i; return Token(Tok.Integer, Location(position - i, this.source), i); } Token lexSymbol () { Token t = symbolFunctions[source.data[position++]](); return t; } Token lexLetter () { int i = 0; bool hasNumber = false; while (getNextChar(++i) == CharType.Letter || getNextChar(i) == CharType.Number) { if (getNextChar(i) == CharType.Number) { hasNumber = true; } } Token t = Token(Tok.Identifier, Location(position, source), i); if (!hasNumber) { char[] str = source.data[position .. position + i]; if(str in keywords) t.type = keywords[str]; } position += i; return t; } CharType getNextChar(int offset = 0) { if (position + offset >= this.source.data.length) return CharType.EOF; char current = source.data[position + offset]; CharType c = charTable[current]; // if(c == CharType.INVALID) // throw error(__LINE__, "Read invalid symbol: '%0'").arg(current); return c; } Error error(uint line, char[] msg) { return (new Error(msg)).loc(Location(position, source)); } int position; DataSource source; Error[] errors; CharType[] charTable; Token delegate()[] symbolFunctions; } enum CharType : ubyte { INVALID, Letter, Number, Symbol, Whitespace, EOF }