changeset 4:92df59b1ec4a

- Started implementation of scan().
author aziz
date Fri, 22 Jun 2007 14:12:02 +0000
parents 4bbce78bfb1e
children 79b4e8848794
files trunk/src/Lexer.d trunk/src/Token.d trunk/src/main.d
diffstat 3 files changed, 73 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/Lexer.d	Thu Jun 21 18:36:04 2007 +0000
+++ b/trunk/src/Lexer.d	Fri Jun 22 14:12:02 2007 +0000
@@ -5,6 +5,8 @@
 module Lexer;
 import Token;
 import std.stdio;
+import std.utf;
+import std.uni;
 
 /// ASCII character properties table.
 static const int ptable[256] = [
@@ -65,13 +67,68 @@
 
 class Lexer
 {
-  Token t;
-  public void scan(ref Token t)
+  Token token;
+  char[] text;
+  char* p;
+  char* end;
+
+  this(char[] text)
+  {
+    this.text = text;
+    this.text.length = this.text.length + 1;
+    this.text[$-1] = 0;
+
+    this.p = this.text.ptr;
+    this.end = this.p + this.text.length;
+  }
+
+  public void scan(out Token t)
   {
+    assert(p < end);
+    t.start = p;
 
+    char c = *p;
+
+    if (c == '\0')
+    {
+      t.type = TOK.EOF;
+      t.end = p+1;
+      return;
+    }
+
+    if (!isident(c) || isdigit(c))
+    {
+      do
+        c = *++p;
+      while ((!isident(c) || isdigit(c)) && c != '\0')
+      t.type = TOK.Whitespace;
+      t.end = p;
+      return;
+    }
+
+    if (isident(c) && !isdigit(c))
+    {
+      do
+      { c = *++p; }
+      while (isident(c))
+      t.type = TOK.Identifier;
+      t.end = p;
+      return;
+    }
   }
+
   public TOK nextToken()
   {
-    return TOK.max;
+    scan(this.token);
+    return this.token.type;
+  }
+
+  Token[] getTokens()
+  {
+    Token[] tokens;
+    while (nextToken() != TOK.EOF)
+      tokens ~= this.token;
+    tokens ~= this.token;
+    return tokens;
   }
 }
\ No newline at end of file
--- a/trunk/src/Token.d	Thu Jun 21 18:36:04 2007 +0000
+++ b/trunk/src/Token.d	Fri Jun 22 14:12:02 2007 +0000
@@ -14,7 +14,8 @@
 {
   Identifier,
   Whitespace,
-  Comment
+  Comment,
+  EOF
 }
 
 struct Token
@@ -22,6 +23,9 @@
   TOK type;
   Position pos;
 
+  char* start;
+  char* end;
+
   union
   {
     char[] str;
--- a/trunk/src/main.d	Thu Jun 21 18:36:04 2007 +0000
+++ b/trunk/src/main.d	Fri Jun 22 14:12:02 2007 +0000
@@ -6,8 +6,16 @@
 import Lexer;
 import Token;
 import std.stdio;
+import std.file;
 
 void main(char[][] args)
 {
+  auto srctext = cast(char[]) std.file.read(args[1]);
+  auto lx = new Lexer(srctext);
 
+  foreach(token; lx.getTokens())
+  {
+    if (token.type == TOK.Whitespace)
+      writefln("%s", token.start[0..token.end-token.start]);
+  }
 }
\ No newline at end of file