projects/dang: src/lexer/Lexer.d comparison

comparison src/lexer/Lexer.d @ 206:d3c148ca429b

Major moving of files. all src now goes into src, all docs in docs.

author	Anders Johnsen <skabet@gmail.com>
date	Tue, 12 Aug 2008 18:14:56 +0200
parents
children	e0551773a005

comparison

equal deleted inserted replaced

-:8387cbaa85ab
+:d3c148ca429b
+module lexer.Lexer;
+import basic.Message,
+basic.SourceManager;
+import lexer.Token,
+lexer.Keyword;
+import tango.io.Stdout;
+/**
+The Lexer class will supply you with methods to tokenize a D file. Supply the
+Lexer with a DataSource and you can 'peek' and 'next' Tokens from the file.
+For more info about Tokens, look up the lexer.Token module.
+*/
+class Lexer
+{
+public:
+/**
+Create a new Lexer.
+*/
+this(SourceLocation start, SourceManager src_mgr, MessageHandler messages)
+{
+this.messages = messages;
+sm = src_mgr;
+start_loc = start;
+position = 0;
+source = sm.getRawData(start_loc);
+charTable.length = 256;
+foreach (c; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
+charTable[c] = CharType.Letter;
+foreach (c; "0123456789")
+charTable[c] = CharType.Number;
+foreach (c; "(){}[];:.,=!<>+-*/%\"`")
+charTable[c] = CharType.Symbol;
+foreach (c; " \n")
+charTable[c] = CharType.Whitespace;
+foreach (c; "'\\")
+charTable[c] = CharType.Other;
+symbolFunctions.length = 256;
+symbolFunctions['('] = &openParentheses;
+symbolFunctions[')'] = &closeParentheses;
+symbolFunctions['{'] = &openBrace;
+symbolFunctions['}'] = &closeBrace;
+symbolFunctions['['] = &openBracket;
+symbolFunctions[']'] = &closeBracket;
+symbolFunctions[';'] = &seperator;
+symbolFunctions[':'] = &colon;
+symbolFunctions['.'] = &dot;
+symbolFunctions[','] = &comma;
+symbolFunctions['='] = &eq;
+symbolFunctions['!'] = &ne;
+symbolFunctions['<'] = &le;
+symbolFunctions['>'] = &ge;
+symbolFunctions['+'] = &plus;
+symbolFunctions['-'] = &minus;
+symbolFunctions['*'] = &star;
+symbolFunctions['/'] = &slash;
+symbolFunctions['%'] = &percent;
+symbolFunctions['"'] = &string;
+symbolFunctions['`'] = &string;
+}
+/**
+Get the next token from the source. This method will move the
+internal position forward to the next Token.
+return: A Token - Token.type is TokType.EOF if there is
+no more tokens in the file.
+*/
+Token next()
+{
+switch (getNextChar)
+{
+case CharType.EOF:
+SLoc loc;
+return Token(Tok.EOF, loc, 0);
+case CharType.Whitespace:
+position += 1;
+return this.next;
+case CharType.Symbol:
+return lexSymbol;
+case CharType.Letter:
+return lexLetter;
+case CharType.Number:
+return lexNumber;
+case CharType.Other:
+messages.report(UnexpectedTok, Loc(position)).fatal(ExitLevel.Lexer);
+}
+}
+/**
+Get the next token from the source. This method will NOT move the
+internal position forward, and thereby having no side-effects.
+return: A Token - Token.type is TokType.EOF if there is
+no more tokens in the file.
+*/
+Token peek(int skip = 0)
+{
+int oldPosition = this.position;
+while (skip-- > 0)
+this.next;
+Token t = this.next;
+this.position = oldPosition;
+return t;
+}
+private:
+Token eq()
+{
+if(source[position] == '=')
+return Token(Tok.Eq, Loc(position++ - 1), 2);
+return Token(Tok.Assign, Loc(position - 1), 1);
+}
+Token openBrace()
+{
+return Token(Tok.OpenBrace, Loc(position - 1), 1);
+}
+Token closeBrace()
+{
+return Token(Tok.CloseBrace, Loc(position - 1), 1);
+}
+Token openParentheses()
+{
+return Token(Tok.OpenParentheses, Loc(position - 1), 1);
+}
+Token closeParentheses()
+{
+return Token(Tok.CloseParentheses, Loc(position - 1), 1);
+}
+Token openBracket()
+{
+return Token(Tok.OpenBracket, Loc(position - 1), 1);
+}
+Token closeBracket()
+{
+return Token(Tok.CloseBracket, Loc(position - 1), 1);
+}
+Token seperator()
+{
+return Token(Tok.Seperator, Loc(position - 1), 1);
+}
+Token colon()
+{
+return Token(Tok.Colon, Loc(position - 1), 1);
+}
+Token dot()
+{
+int pos = 0;
+while(getNextChar(0) == CharType.Number ||
+this.source[position + pos + 1] == '_')
+{
+if(getNextChar(0) == CharType.Number)
+{
+position--;
+return lexNumber();
+}
+pos++;
+}
+return Token(Tok.Dot, Loc(position - 1), 1);
+}
+Token comma()
+{
+return Token(Tok.Comma, Loc(position - 1), 1);
+}
+Token ne()
+{
+if(source[position] == '=')
+return Token(Tok.Ne, Loc(position++ - 1), 2);
+return Token(Tok.Not, Loc(position - 1), 1);
+}
+Token le()
+{
+if(source[position] == '=')
+return Token(Tok.Le, Loc(position++ - 1), 2);
+return Token(Tok.Lt, Loc(position - 1), 1);
+}
+Token ge()
+{
+if(source[position] == '=')
+return Token(Tok.Ge, Loc(position++ - 1), 2);
+return Token(Tok.Gt, Loc(position - 1), 1);
+}
+Token plus()
+{
+return Token(Tok.Plus, Loc(position - 1), 1);
+}
+Token minus()
+{
+return Token(Tok.Minus, Loc(position - 1), 1);
+}
+Token star()
+{
+return Token(Tok.Star, Loc(position - 1), 1);
+}
+Token slash()
+{
+switch(source[position])
+{
+case '/':
+while(getNextChar != CharType.EOF)
+{
+if(source[position++] == '\n')
+return this.next;
+}
+return Token(Tok.EOF, Loc(position), 0);
+case '*':
+position += 2;
+while(getNextChar != CharType.EOF)
+{
+++position;
+if(source[position-2] == '*')
+if(source[position-1] == '/')
+{
+return this.next;
+}
+}
+messages.report(UnexpectedEOFBlock,Loc(position));
+case '+':
+position += 2;
+int nesting = 1;
+while(getNextChar != CharType.EOF)
+{
+++position;
+if(source[position-2] == '+')
+if(source[position-1] == '/')
+{
+position++;
+nesting--;
+}
+if(source[position-2] == '/')
+if(source[position-1] == '+')
+{
+nesting++;
+position++;
+}
+if(nesting == 0)
+return this.next;
+}
+messages.report(UnexpectedEOFBlock,Loc(position));
+default:
+return Token(Tok.Slash, Loc(position - 1), 1);
+}
+}
+Token percent()
+{
+return Token(Tok.Percent, Loc(position - 1), 1);
+}
+Token string()
+{
+--position;
+int start = position;
+if(getNextChar() == CharType.Letter)
+position++;
+char end = '`';
+switch(source[position])
+{
+case '"':
+if(position > 0)
+if(source[position-1] == 'r')
+{
+end = '"';
+goto string_wys;
+}
+++position;
+while(getNextChar != CharType.EOF)
+{
+++position;
+if (source[position-1] == '"' )
+return Token(Tok.String, Loc(start), position - start);
+else if (source[position-1] == '\\')
+position++;
+}
+break;
+case '`':
+string_wys:
+++position;
+while(getNextChar != CharType.EOF)
+{
+++position;
+if (source[position-1] == end )
+return Token(Tok.String, Loc(start), position - start);
+}
+break;
+}
+messages.report(UnexpectedEOFBlock, Loc(position)).fatal(ExitLevel.Lexer);
+}
+Token lexNumber ()
+{
+bool sign = false;
+bool dot = false;
+bool e = false;
+int i = 0;
+bool end = false;
+while(!end)
+{
+switch(getNextChar(i))
+{
+case CharType.Number:
+break;
+case CharType.Symbol:
+if(this.source[position+i] == '.')
+{
+if(dot)
+messages.report(OnlyOneDotFloating, Loc(position + i));
+dot = true;
+break;
+}
+end = true;
+continue;
+case CharType.Letter:
+if(this.source[position+i] == '_')
+break;
+if (this.source[position+i] == 'e' ||
+this.source[position+i] == 'E')
+{
+if (e)
+messages.report(OnlyOneEFloating, Loc(position + i));
+e = true;
+break;
+}
+end = true;
+continue;
+default:
+end = true;
+continue;
+}
+i++;
+}
+position += i;
+return Token(Tok.Integer, Loc(position - i), i);
+}
+Token lexSymbol ()
+{
+Token t = symbolFunctions[source[position++]]();
+return t;
+}
+Token lexLetter ()
+{
+int i = 0;
+bool hasNumber = false;
+if (source[position+1] == '"' ||
+source[position+1] == '`')
+{
+++position;
+return string;
+}
+while (getNextChar(++i) == CharType.Letter ||
+getNextChar(i) == CharType.Number)
+{
+if (getNextChar(i) == CharType.Number)
+{
+hasNumber = true;
+}
+}
+Token t = Token(Tok.Identifier, Loc(), i);
+if (!hasNumber)
+{
+char[] str = source[position .. position + i];
+if(str in keywords)
+t.type = keywords[str];
+}
+position += i;
+return t;
+}
+CharType getNextChar(int offset = 0)
+{
+if (position + offset >= this.source.length)
+return CharType.EOF;
+char current = source[position + offset];
+CharType c = charTable[current];
+if(c == CharType.INVALID)
+messages.report(InvalidSymbol, Loc())
+.arg(Integer.toString(cast(int)current))
+.fatal(ExitLevel.Lexer);
+return c;
+}
+private final SourceLocation Loc(int pos = -1)
+{
+if (pos < 0)
+return start_loc + position;
+return start_loc + pos;
+}
+SourceManager sm;
+SourceLocation start_loc;
+int position;
+char[] source;
+MessageHandler messages;
+CharType[] charTable;
+Token delegate()[] symbolFunctions;
+}
+enum CharType : ubyte
+{
+INVALID,
+Letter,
+Number,
+Symbol,
+Whitespace,
+Other,
+EOF
+}

Mercurial > projects > dang

comparison src/lexer/Lexer.d @ 206:d3c148ca429b