Mercurial > projects > dang
view lexer/Lexer.d @ 15:59bfbaf8847f
Updates to run.d - still errors
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Fri, 18 Apr 2008 17:49:34 +0200 |
parents | 2e1069ee21af |
children | 69464d465284 |
line wrap: on
line source
module lexer.Lexer; import misc.Error, misc.DataSource; import lexer.Token, lexer.Keyword; import tango.io.Stdout; class Lexer { public: this (DataSource source) { this.source = source; this.position = 0; } Token next () { switch (getNextChar) { case CharType.EOF: Location l; return Token (Tok.EOF, l, 0); case CharType.Whitespace: position += 1; return this.next; case CharType.Symbol: return lexSymbol; case CharType.Letter: return lexLetter; case CharType.Number: return lexNumber; } } Token peek ( int skip = 0) { int oldPosition = this.position; while(skip-- > 0) this.next; Token t = this.next; this.position = oldPosition; return t; } public Error[] getErrors() { return this.errors; } private: Token lexNumber () { int i = 0; while(getNextChar(++i) == CharType.Number) {} position += i; return Token(Tok.Integer, Location(position - i, this.source), i); } Token lexSymbol () { switch(source.data[position++]) { case '(': return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); case ')': return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); case '{': return Token(Tok.OpenBrace, Location(position - 1, this.source), 1); case '}': return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); case ';': return Token(Tok.Seperator, Location(position - 1, this.source), 1); case ',': return Token(Tok.Comma, Location(position - 1, this.source), 1); case '=': if(source.data[position] == '=') return Token(Tok.Eq, Location(position++ - 1, this.source), 2); return Token(Tok.Assign, Location(position - 1, this.source), 1); case '!': if(source.data[position] == '=') return Token(Tok.Ne, Location(position++ - 1, this.source), 2); return Token(Tok.Not, Location(position - 1, this.source), 1); case '<': if(source.data[position] == '=') return Token(Tok.Le, Location(position++ - 1, this.source), 2); return Token(Tok.Lt, Location(position - 1, this.source), 1); case '>': if(source.data[position] == '=') return Token(Tok.Ge, Location(position++ - 1, this.source), 2); return Token(Tok.Gt, Location(position - 1, this.source), 1); case '+': return Token(Tok.Add, Location(position - 1, this.source), 1); case '-': return Token(Tok.Sub, Location(position - 1, this.source), 1); case '*': return Token(Tok.Mul, Location(position - 1, this.source), 1); case '/': switch(source.data[position]) { case '/': while(getNextChar != CharType.EOF) { if(source.data[position++] == '\n') return this.next; } return Token(Tok.EOF, Location(position, this.source), 0); case '*': position += 2; while(getNextChar != CharType.EOF) { ++position; if(source.data[position-2] == '*') if(source.data[position-1] == '/') return this.next; } throw new Error("Unexpected end of file. Unclosed comment block", Location(position, source)); case '+': position += 2; int nesting = 1; while(getNextChar != CharType.EOF) { ++position; if(source.data[position-2] == '+') if(source.data[position-1] == '/') { position++; nesting--; } if(source.data[position-2] == '/') if(source.data[position-1] == '+') { nesting++; position++; } if(nesting == 0) return this.next; } throw new Error("Unexpected end of file. Unclosed comment block", Location(position, source)); default: return Token(Tok.Div, Location(position - 1, this.source), 1); } } } Token lexLetter () { int i = 0; bool hasNumber = false; while (getNextChar(++i) == CharType.Letter || getNextChar(i) == CharType.Number) { if (getNextChar(i) == CharType.Number) { hasNumber = true; } } Token t = Token(Tok.Identifier, Location(position, source), i); if (!hasNumber) { char[] str = source.data[position .. position + i]; if(str in keywords) t.type = keywords[str]; } position += i; return t; } CharType getNextChar(int offset = 0) { if (position + offset >= this.source.data.length) return CharType.EOF; char current = source.data[position + offset]; if (current >= 'A' && current <= 'Z' || current >= 'a' && current <= 'z' || current > 127) return CharType.Letter; if (current >= '0' && current <= '9') return CharType.Number; switch(current) { case ' ': case '\n': return CharType.Whitespace; case '(': case ')': case '{': case '}': case ';': case ',': case '=': case '!': case '<': case '>': case '+': case '-': case '*': case '/': return CharType.Symbol; default: throw new Error("Read invalid symbol: '" ~ current ~ "'", Location(position, source)); } } DataSource source; int position; Error[] errors; } enum CharType : ubyte { Letter, Number, Symbol, Whitespace, EOF }