0
|
1 /++
|
|
2 Author: Aziz Köksal
|
|
3 License: GPL2
|
|
4 +/
|
|
5 module Lexer;
|
3
|
6 import Token;
|
2
|
7 import std.stdio;
|
4
|
8 import std.utf;
|
|
9 import std.uni;
|
0
|
10
|
|
11 /// ASCII character properties table.
|
2
|
12 static const int ptable[256] = [
|
|
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
|
|
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
|
|
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
|
|
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
21 ];
|
0
|
22
|
|
23 enum CProperty
|
|
24 {
|
1
|
25 Octal = 1,
|
0
|
26 Digit = 1<<1,
|
|
27 Hex = 1<<2,
|
|
28 Alpha = 1<<3,
|
|
29 Identifier = 1<<4
|
|
30 }
|
|
31
|
|
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
|
|
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
|
|
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
|
|
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
|
|
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
|
|
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
|
2
|
38 /+
|
0
|
39 static this()
|
|
40 {
|
2
|
41 // Initialize character properties table.
|
0
|
42 for (int i; i < ptable.length; ++i)
|
|
43 {
|
|
44 if ('0' <= i && i <= '7')
|
|
45 ptable[i] |= CProperty.Octal;
|
|
46 if ('0' <= i && i <= '9')
|
|
47 ptable[i] |= CProperty.Digit;
|
|
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
|
|
49 ptable[i] |= CProperty.Hex;
|
|
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
|
|
51 ptable[i] |= CProperty.Alpha;
|
|
52 if (isalnum(i) || i == '_')
|
1
|
53 ptable[i] |= CProperty.Identifier;
|
0
|
54 }
|
2
|
55 // Print a formatted array literal.
|
|
56 char[] array = "[\n";
|
|
57 for (int i; i < ptable.length; ++i)
|
|
58 {
|
|
59 char c = ptable[i];
|
|
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
|
|
61 }
|
|
62 array.length = array.length - 2; // remove ",\n"
|
|
63 array ~= "\n]";
|
|
64 writefln(array);
|
0
|
65 }
|
2
|
66 +/
|
0
|
67
|
|
68 class Lexer
|
|
69 {
|
4
|
70 Token token;
|
|
71 char[] text;
|
|
72 char* p;
|
|
73 char* end;
|
|
74
|
|
75 this(char[] text)
|
|
76 {
|
|
77 this.text = text;
|
|
78 this.text.length = this.text.length + 1;
|
|
79 this.text[$-1] = 0;
|
|
80
|
|
81 this.p = this.text.ptr;
|
|
82 this.end = this.p + this.text.length;
|
|
83 }
|
|
84
|
|
85 public void scan(out Token t)
|
3
|
86 {
|
4
|
87 assert(p < end);
|
|
88 t.start = p;
|
0
|
89
|
4
|
90 char c = *p;
|
|
91
|
|
92 if (c == '\0')
|
|
93 {
|
|
94 t.type = TOK.EOF;
|
|
95 t.end = p+1;
|
|
96 return;
|
|
97 }
|
|
98
|
|
99 if (!isident(c) || isdigit(c))
|
|
100 {
|
|
101 do
|
|
102 c = *++p;
|
|
103 while ((!isident(c) || isdigit(c)) && c != '\0')
|
|
104 t.type = TOK.Whitespace;
|
|
105 t.end = p;
|
|
106 return;
|
|
107 }
|
|
108
|
|
109 if (isident(c) && !isdigit(c))
|
|
110 {
|
|
111 do
|
|
112 { c = *++p; }
|
|
113 while (isident(c))
|
|
114 t.type = TOK.Identifier;
|
|
115 t.end = p;
|
|
116 return;
|
|
117 }
|
3
|
118 }
|
4
|
119
|
3
|
120 public TOK nextToken()
|
|
121 {
|
4
|
122 scan(this.token);
|
|
123 return this.token.type;
|
|
124 }
|
|
125
|
|
126 Token[] getTokens()
|
|
127 {
|
|
128 Token[] tokens;
|
|
129 while (nextToken() != TOK.EOF)
|
|
130 tokens ~= this.token;
|
|
131 tokens ~= this.token;
|
|
132 return tokens;
|
3
|
133 }
|
0
|
134 } |