Mercurial > projects > dang
changeset 42:4e879f82dd64 new_gen
Added some docs for the lexer - now you can understand _some_ of the madness going on here :)
author | Anders Johnsen <skabet@gmail.com> |
---|---|
date | Tue, 22 Apr 2008 22:25:07 +0200 |
parents | f977aa28eb32 |
children | a712c530b7cc |
files | lexer/Keyword.d lexer/Lexer.d lexer/Token.d |
diffstat | 3 files changed, 70 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/lexer/Keyword.d Tue Apr 22 19:30:51 2008 +0200 +++ b/lexer/Keyword.d Tue Apr 22 22:25:07 2008 +0200 @@ -2,6 +2,10 @@ import lexer.Token; +/** + A list of keywords in an associative array that link a string + representation of the keyword to a Tok + */ Tok[char[]] keywords; static this ()
--- a/lexer/Lexer.d Tue Apr 22 19:30:51 2008 +0200 +++ b/lexer/Lexer.d Tue Apr 22 22:25:07 2008 +0200 @@ -8,9 +8,24 @@ import tango.io.Stdout; +/** + The Lexer class will supply you with methods to tokenize a D file. Supply the + Lexer with a DataSource and you can 'peek' and 'next' Tokens from the file. + + For more info about Tokens, look up the lexer.Token module. +*/ class Lexer { public: + + /** + Create a new Lexer. + + params: + source = The source to tokenize. + + */ + this (DataSource source) { this.source = source; @@ -50,6 +65,13 @@ symbolFunctions['/'] = ÷ } + /** + Get the next token from the source. This method will move the + internal position forward to the next Token. + + return: A Token - Token.type is equals TokType.EOF if there is + no more tokens in the file. + */ Token next () { switch (getNextChar) @@ -73,6 +95,13 @@ } } + /** + Get the next token from the source. This method will NOT move the + internal position forward, and thereby having no side-effects. + + return: A Token - Token.type is equals TokType.EOF if there is + no more tokens in the file. + */ Token peek ( int skip = 0) { int oldPosition = this.position; @@ -83,6 +112,11 @@ return t; } + /** + Return all errors that occurred while tokenizing the string. + + TODO: Error system not implemented yet - this is a stub! + */ public Error[] getErrors() { return this.errors;
--- a/lexer/Token.d Tue Apr 22 19:30:51 2008 +0200 +++ b/lexer/Token.d Tue Apr 22 22:25:07 2008 +0200 @@ -5,12 +5,23 @@ import Integer = tango.text.convert.Integer; +/** + The Token struct will be used through the Lexer, Parser and other + modules as a location into source. + + The Token should always be optimized for size to limit unnecessary + memory usage. + */ struct Token { Tok type; Location location; uint length; + /** + Create a new token with a Tok type, Location in source and a + length of how many chars the Token span in the source + */ static Token opCall (Tok type, Location location, uint length) { Token t; @@ -20,23 +31,39 @@ return t; } + /** + Get the type of the Token as a string + */ char[] getType () { return typeToString[this.type]; } + /** + A human readable dump of a Token + */ char[] toString () { return this.getType()~": Len: "~Integer.toString(this.length) ~", Loc: "~location.toString; } + /** + Get the string in the source that matches what this Token is + covering. + */ char[] get () { return location.get(length); } } +/** + Tok is short for TokenType. This enum list is to supply the Token + with a type. + + This enum is used to switch over "many" places. + */ enum Tok : ushort { /* Non-code related tokens */ @@ -89,6 +116,11 @@ } +/** + An associative array to supply a Tok to String function. + + Keep always this list updated when adding a new Tok. + */ public char[][Tok] typeToString; static this()