0
|
1 /++
|
|
2 Author: Aziz Köksal
|
|
3 License: GPL2
|
|
4 +/
|
|
5 module Lexer;
|
3
|
6 import Token;
|
2
|
7 import std.stdio;
|
4
|
8 import std.utf;
|
|
9 import std.uni;
|
0
|
10
|
|
11 /// ASCII character properties table.
|
2
|
12 static const int ptable[256] = [
|
|
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
|
|
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
|
|
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
|
|
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
21 ];
|
0
|
22
|
|
23 enum CProperty
|
|
24 {
|
1
|
25 Octal = 1,
|
0
|
26 Digit = 1<<1,
|
|
27 Hex = 1<<2,
|
|
28 Alpha = 1<<3,
|
|
29 Identifier = 1<<4
|
|
30 }
|
|
31
|
|
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
|
|
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
|
|
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
|
|
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
|
|
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
|
|
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
|
2
|
38 /+
|
0
|
39 static this()
|
|
40 {
|
2
|
41 // Initialize character properties table.
|
0
|
42 for (int i; i < ptable.length; ++i)
|
|
43 {
|
|
44 if ('0' <= i && i <= '7')
|
|
45 ptable[i] |= CProperty.Octal;
|
|
46 if ('0' <= i && i <= '9')
|
|
47 ptable[i] |= CProperty.Digit;
|
|
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
|
|
49 ptable[i] |= CProperty.Hex;
|
|
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
|
|
51 ptable[i] |= CProperty.Alpha;
|
|
52 if (isalnum(i) || i == '_')
|
1
|
53 ptable[i] |= CProperty.Identifier;
|
0
|
54 }
|
2
|
55 // Print a formatted array literal.
|
|
56 char[] array = "[\n";
|
|
57 for (int i; i < ptable.length; ++i)
|
|
58 {
|
|
59 char c = ptable[i];
|
|
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
|
|
61 }
|
|
62 array.length = array.length - 2; // remove ",\n"
|
|
63 array ~= "\n]";
|
|
64 writefln(array);
|
0
|
65 }
|
2
|
66 +/
|
0
|
67
|
|
68 class Lexer
|
|
69 {
|
4
|
70 Token token;
|
|
71 char[] text;
|
|
72 char* p;
|
|
73 char* end;
|
|
74
|
|
75 this(char[] text)
|
|
76 {
|
|
77 this.text = text;
|
|
78 this.text.length = this.text.length + 1;
|
|
79 this.text[$-1] = 0;
|
|
80
|
|
81 this.p = this.text.ptr;
|
|
82 this.end = this.p + this.text.length;
|
|
83 }
|
|
84
|
|
85 public void scan(out Token t)
|
3
|
86 {
|
4
|
87 assert(p < end);
|
0
|
88
|
4
|
89 char c = *p;
|
|
90
|
5
|
91 while(1)
|
4
|
92 {
|
5
|
93 t.start = p;
|
|
94 if (c == 0)
|
|
95 {
|
|
96 t.type = TOK.EOF;
|
|
97 t.end = p+1;
|
|
98 return;
|
|
99 }
|
4
|
100
|
5
|
101 if (isident(c) && !isdigit(c))
|
|
102 {
|
|
103 do
|
|
104 { c = *++p; }
|
|
105 while (isident(c))
|
|
106 t.type = TOK.Identifier;
|
|
107 t.end = p;
|
|
108 return;
|
|
109 }
|
|
110
|
8
|
111 if (c == '/')
|
5
|
112 {
|
8
|
113 c = *++p;
|
|
114 if (c == '+')
|
5
|
115 {
|
8
|
116 uint level = 1;
|
|
117 do
|
7
|
118 {
|
8
|
119 c = *++p;
|
|
120 if (c == 0)
|
|
121 throw new Error("unterminated /+ +/ comment.");
|
|
122 else if (c == '/' && p[1] == '+')
|
|
123 {
|
|
124 ++p;
|
|
125 ++level;
|
|
126 }
|
|
127 else if (c == '+' && p[1] == '/')
|
|
128 {
|
|
129 ++p;
|
|
130 if (--level == 0)
|
|
131 break;
|
|
132 }
|
|
133 } while (1)
|
|
134 p += 2;
|
|
135 t.type = TOK.Comment;
|
|
136 t.end = p;
|
|
137 return;
|
|
138 }
|
|
139 else if (c == '*')
|
|
140 {
|
|
141 do
|
7
|
142 {
|
8
|
143 c = *++p;
|
|
144 if (c == 0)
|
|
145 throw new Error("unterminated /* */ comment.");
|
|
146 } while (c != '*' || p[1] != '/')
|
|
147 p += 2;
|
|
148 t.type = TOK.Comment;
|
|
149 t.end = p;
|
|
150 return;
|
|
151 }
|
5
|
152 }
|
|
153
|
|
154 c = *++p;
|
4
|
155 }
|
3
|
156 }
|
4
|
157
|
3
|
158 public TOK nextToken()
|
|
159 {
|
4
|
160 scan(this.token);
|
|
161 return this.token.type;
|
|
162 }
|
|
163
|
|
164 Token[] getTokens()
|
|
165 {
|
|
166 Token[] tokens;
|
|
167 while (nextToken() != TOK.EOF)
|
|
168 tokens ~= this.token;
|
|
169 tokens ~= this.token;
|
|
170 return tokens;
|
3
|
171 }
|
5
|
172 }
|