annotate trunk/src/Lexer.d @ 4:92df59b1ec4a

- Started implementation of scan().
author aziz
date Fri, 22 Jun 2007 14:12:02 +0000
parents 4bbce78bfb1e
children 79b4e8848794
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
7 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
8 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
9 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
12 static const int ptable[256] = [
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
21 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
22
8ba2570de175 Initial import.
aziz
parents:
diff changeset
23 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
24 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
25 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
26 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
27 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
28 Alpha = 1<<3,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
29 Identifier = 1<<4
8ba2570de175 Initial import.
aziz
parents:
diff changeset
30 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
31
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
38 /+
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
39 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
40 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
41 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
42 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
43 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
44 if ('0' <= i && i <= '7')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
45 ptable[i] |= CProperty.Octal;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
46 if ('0' <= i && i <= '9')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
47 ptable[i] |= CProperty.Digit;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
49 ptable[i] |= CProperty.Hex;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
51 ptable[i] |= CProperty.Alpha;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
52 if (isalnum(i) || i == '_')
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
53 ptable[i] |= CProperty.Identifier;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
54 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
55 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
56 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
57 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
58 {
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
59 char c = ptable[i];
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
61 }
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
62 array.length = array.length - 2; // remove ",\n"
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
63 array ~= "\n]";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
64 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
65 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
66 +/
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
67
8ba2570de175 Initial import.
aziz
parents:
diff changeset
68 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
69 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
70 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
71 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
72 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
73 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
74
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
75 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
76 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
77 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
78 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
79 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
80
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
81 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
82 this.end = this.p + this.text.length;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
83 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
84
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
85 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
86 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
87 assert(p < end);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
88 t.start = p;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
89
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
90 char c = *p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
91
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
92 if (c == '\0')
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
93 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
94 t.type = TOK.EOF;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
95 t.end = p+1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
96 return;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
97 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
98
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
99 if (!isident(c) || isdigit(c))
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
100 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
101 do
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
102 c = *++p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
103 while ((!isident(c) || isdigit(c)) && c != '\0')
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
104 t.type = TOK.Whitespace;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
105 t.end = p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
106 return;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
107 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
108
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
109 if (isident(c) && !isdigit(c))
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
110 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
111 do
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
112 { c = *++p; }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
113 while (isident(c))
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
114 t.type = TOK.Identifier;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
115 t.end = p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
116 return;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
117 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
118 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
119
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
120 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
121 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
122 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
123 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
124 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
125
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
126 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
127 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
128 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
129 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
130 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
131 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
132 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
133 }
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
134 }