# HG changeset patch # User aziz # Date 1182521522 0 # Node ID 92df59b1ec4a279bf6a45328060b692e112e63b5 # Parent 4bbce78bfb1e928975fcab576da357062e47e13d - Started implementation of scan(). diff -r 4bbce78bfb1e -r 92df59b1ec4a trunk/src/Lexer.d --- a/trunk/src/Lexer.d Thu Jun 21 18:36:04 2007 +0000 +++ b/trunk/src/Lexer.d Fri Jun 22 14:12:02 2007 +0000 @@ -5,6 +5,8 @@ module Lexer; import Token; import std.stdio; +import std.utf; +import std.uni; /// ASCII character properties table. static const int ptable[256] = [ @@ -65,13 +67,68 @@ class Lexer { - Token t; - public void scan(ref Token t) + Token token; + char[] text; + char* p; + char* end; + + this(char[] text) + { + this.text = text; + this.text.length = this.text.length + 1; + this.text[$-1] = 0; + + this.p = this.text.ptr; + this.end = this.p + this.text.length; + } + + public void scan(out Token t) { + assert(p < end); + t.start = p; + char c = *p; + + if (c == '\0') + { + t.type = TOK.EOF; + t.end = p+1; + return; + } + + if (!isident(c) || isdigit(c)) + { + do + c = *++p; + while ((!isident(c) || isdigit(c)) && c != '\0') + t.type = TOK.Whitespace; + t.end = p; + return; + } + + if (isident(c) && !isdigit(c)) + { + do + { c = *++p; } + while (isident(c)) + t.type = TOK.Identifier; + t.end = p; + return; + } } + public TOK nextToken() { - return TOK.max; + scan(this.token); + return this.token.type; + } + + Token[] getTokens() + { + Token[] tokens; + while (nextToken() != TOK.EOF) + tokens ~= this.token; + tokens ~= this.token; + return tokens; } } \ No newline at end of file diff -r 4bbce78bfb1e -r 92df59b1ec4a trunk/src/Token.d --- a/trunk/src/Token.d Thu Jun 21 18:36:04 2007 +0000 +++ b/trunk/src/Token.d Fri Jun 22 14:12:02 2007 +0000 @@ -14,7 +14,8 @@ { Identifier, Whitespace, - Comment + Comment, + EOF } struct Token @@ -22,6 +23,9 @@ TOK type; Position pos; + char* start; + char* end; + union { char[] str; diff -r 4bbce78bfb1e -r 92df59b1ec4a trunk/src/main.d --- a/trunk/src/main.d Thu Jun 21 18:36:04 2007 +0000 +++ b/trunk/src/main.d Fri Jun 22 14:12:02 2007 +0000 @@ -6,8 +6,16 @@ import Lexer; import Token; import std.stdio; +import std.file; void main(char[][] args) { + auto srctext = cast(char[]) std.file.read(args[1]); + auto lx = new Lexer(srctext); + foreach(token; lx.getTokens()) + { + if (token.type == TOK.Whitespace) + writefln("%s", token.start[0..token.end-token.start]); + } } \ No newline at end of file