Mercurial > projects > dang
view src/lexer/Lexer.d @ 211:9e9f3e7e342b default tip
Added dang folder and Module in ast.
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 12 Aug 2008 20:07:35 +0200 |
parents | e0551773a005 |
children |
line wrap: on
line source
module lexer.Lexer; import basic.Message, basic.SourceManager; import lexer.Token, lexer.Keyword; import tango.io.Stdout; /** The Lexer class will supply you with methods to tokenize a D file. Supply the Lexer with a DataSource and you can 'peek' and 'next' Tokens from the file. For more info about Tokens, look up the lexer.Token module. */ class Lexer { public: /** Create a new Lexer. */ this(SourceLocation start, SourceManager src_mgr, MessageHandler messages) { this.messages = messages; sm = src_mgr; start_loc = start; position = 0; source = sm.getRawData(start_loc); charTable.length = 256; foreach (c; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") charTable[c] = CharType.Letter; foreach (c; "0123456789") charTable[c] = CharType.Number; foreach (c; "(){}[];:.,=!<>+-*/%&\"`") charTable[c] = CharType.Symbol; foreach (c; " \n") charTable[c] = CharType.Whitespace; foreach (c; "'\\") charTable[c] = CharType.Other; symbolFunctions.length = 256; symbolFunctions['('] = &openParentheses; symbolFunctions[')'] = &closeParentheses; symbolFunctions['{'] = &openBrace; symbolFunctions['}'] = &closeBrace; symbolFunctions['['] = &openBracket; symbolFunctions[']'] = &closeBracket; symbolFunctions[';'] = &seperator; symbolFunctions[':'] = : symbolFunctions['.'] = ˙ symbolFunctions[','] = , symbolFunctions['='] = &eq; symbolFunctions['!'] = ≠ symbolFunctions['<'] = ≤ symbolFunctions['>'] = ≥ symbolFunctions['+'] = + symbolFunctions['-'] = − symbolFunctions['*'] = ☆ symbolFunctions['/'] = &slash; symbolFunctions['%'] = &percent; symbolFunctions['&'] = ∧ symbolFunctions['"'] = &string; symbolFunctions['`'] = &string; last = Token(Tok.EOF, SLoc() + 1, 0); } /** Get the next token from the source. This method will move the internal position forward to the next Token. return: A Token - Token.type is TokType.EOF if there is no more tokens in the file. */ Token next() { Token res; switch (getNextChar) { case CharType.EOF: return Token(Tok.EOF, last.location, 0); case CharType.Whitespace: position += 1; res = this.next; break; case CharType.Symbol: res = lexSymbol; break; case CharType.Letter: res = lexLetter; break; case CharType.Number: res = lexNumber; break; case CharType.Other: messages.report(UnexpectedTok, Loc(position)).fatal(ExitLevel.Lexer); } if (res.type != Tok.EOF) last = res; return res; } /** Get the next token from the source. This method will NOT move the internal position forward, and thereby having no side-effects. return: A Token - Token.type is TokType.EOF if there is no more tokens in the file. */ Token peek(int skip = 0) { int oldPosition = this.position; while (skip-- > 0) this.next; Token t = this.next; this.position = oldPosition; return t; } Token last; private: Token eq() { if(source[position] == '=') return Token(Tok.Eq, Loc(position++ - 1), 2); return Token(Tok.Assign, Loc(position - 1), 1); } Token openBrace() { return Token(Tok.OpenBrace, Loc(position - 1), 1); } Token closeBrace() { return Token(Tok.CloseBrace, Loc(position - 1), 1); } Token openParentheses() { return Token(Tok.OpenParentheses, Loc(position - 1), 1); } Token closeParentheses() { return Token(Tok.CloseParentheses, Loc(position - 1), 1); } Token openBracket() { return Token(Tok.OpenBracket, Loc(position - 1), 1); } Token closeBracket() { return Token(Tok.CloseBracket, Loc(position - 1), 1); } Token seperator() { return Token(Tok.Seperator, Loc(position - 1), 1); } Token colon() { return Token(Tok.Colon, Loc(position - 1), 1); } Token dot() { int pos = 0; while(getNextChar(0) == CharType.Number || this.source[position + pos + 1] == '_') { if(getNextChar(0) == CharType.Number) { position--; return lexNumber(); } pos++; } return Token(Tok.Dot, Loc(position - 1), 1); } Token comma() { return Token(Tok.Comma, Loc(position - 1), 1); } Token ne() { if(source[position] == '=') return Token(Tok.Ne, Loc(position++ - 1), 2); return Token(Tok.Not, Loc(position - 1), 1); } Token le() { if(source[position] == '=') return Token(Tok.Le, Loc(position++ - 1), 2); if(source[position] == '<') return Token(Tok.LeftShift, Loc(position++ - 1), 2); return Token(Tok.Lt, Loc(position - 1), 1); } Token ge() { if(source[position] == '=') return Token(Tok.Ge, Loc(position++ - 1), 2); if(source[position] == '>') if(source[position+1] == '>') { position += 2; return Token(Tok.UnsignedRightShift, Loc(position - 1), 3); } else return Token(Tok.RightShift, Loc(position++ - 1), 2); return Token(Tok.Gt, Loc(position - 1), 1); } Token plus() { if(source[position] == '=') return Token(Tok.PlusAssign, Loc(position++ - 1), 2); return Token(Tok.Plus, Loc(position - 1), 1); } Token minus() { if(source[position] == '=') return Token(Tok.MinusAssign, Loc(position++ - 1), 2); return Token(Tok.Minus, Loc(position - 1), 1); } Token star() { if(source[position] == '=') return Token(Tok.StarAssign, Loc(position++ - 1), 2); return Token(Tok.Star, Loc(position - 1), 1); } Token slash() { int p = position; switch(source[position]) { case '=': return Token(Tok.SlashAssign, Loc(position++ - 1), 2); case '/': while(getNextChar != CharType.EOF) { if(source[position++] == '\n') return this.next; } return Token(Tok.EOF, Loc(position), 0); case '*': position += 2; while(getNextChar != CharType.EOF) { ++position; if(source[position-2] == '*') if(source[position-1] == '/') { return this.next; } } messages.report(UnexpectedEOFBlock,Loc(p)).fatal(ExitLevel.Lexer); case '+': position += 2; int nesting = 1; while(getNextChar != CharType.EOF) { ++position; if(source[position-2] == '+') if(source[position-1] == '/') { position++; nesting--; } if(source[position-2] == '/') if(source[position-1] == '+') { nesting++; position++; } if(nesting == 0) return this.next; } messages.report( UnexpectedEOFBlock, Loc(p)).fatal(ExitLevel.Lexer); default: return Token(Tok.Slash, Loc(position - 1), 1); } } Token and() { return Token(Tok.And, Loc(position - 1), 1); } Token percent() { if(source[position] == '=') return Token(Tok.PercentAssign, Loc(position++ - 1), 2); return Token(Tok.Percent, Loc(position - 1), 1); } Token string() { --position; int start = position; if(getNextChar() == CharType.Letter) position++; char end = '`'; switch(source[position]) { case '"': if(position > 0) if(source[position-1] == 'r') { end = '"'; goto string_wys; } ++position; while(getNextChar != CharType.EOF) { ++position; if (source[position-1] == '"' ) { if(getNextChar != CharType.EOF) if (source[position] == 'c' || source[position] == 'w' || source[position] == 'd') position++; return Token(Tok.String, Loc(start), position - start); } else if (source[position-1] == '\\') position++; } break; case '`': string_wys: ++position; while(getNextChar != CharType.EOF) { ++position; if (source[position-1] == end ) return Token(Tok.String, Loc(start), position - start); } break; } messages.report(UnexpectedEOFBlock, Loc(position)).fatal(ExitLevel.Lexer); } Token lexNumber () { bool sign; int i = 0; bool end = false; while(!end) { switch(getNextChar(i)) { case CharType.Number: break; case CharType.Symbol: if(this.source[position+i] == '.') { break; } if (this.source[position+i] == '+' || this.source[position+i] == '-') { if (source[position+i-1] == 'e' || source[position+i-1] == 'E') break; } end = true; continue; case CharType.Letter: if(this.source[position+i] == '_') break; if (this.source[position+i] == 'e' || this.source[position+i] == 'E') { break; } end = true; continue; default: end = true; continue; } i++; } while(source[position+i] == 'u' || source[position+i] == 'U' || source[position+i] == 'L') i += 1; position += i; return Token(Tok.Integer, Loc(position - i), i); } Token lexSymbol () { Token t = symbolFunctions[source[position++]](); return t; } Token lexLetter () { int i = 0; bool hasNumber = false; if (source[position+1] == '"' || source[position+1] == '`') { ++position; return string; } while (getNextChar(++i) == CharType.Letter || getNextChar(i) == CharType.Number) { if (getNextChar(i) == CharType.Number) { hasNumber = true; } } Token t = Token(Tok.Identifier, Loc(), i); if (!hasNumber) { char[] str = source[position .. position + i]; if(str in keywords) t.type = keywords[str]; } position += i; return t; } CharType getNextChar(int offset = 0) { if (position + offset >= this.source.length) return CharType.EOF; char current = source[position + offset]; CharType c = charTable[current]; if(c == CharType.INVALID) messages.report(InvalidSymbol, Loc()) .arg(Integer.toString(cast(int)current)) .fatal(ExitLevel.Lexer); return c; } private final SourceLocation Loc(int pos = -1) { if (pos < 0) return start_loc + position; return start_loc + pos; } SourceManager sm; SourceLocation start_loc; int position; char[] source; MessageHandler messages; CharType[] charTable; Token delegate()[] symbolFunctions; } enum CharType : ubyte { INVALID, Letter, Number, Symbol, Whitespace, Other, EOF }