annotate trunk/src/Lexer.d @ 8:d4ba94a5a282

- Parsing /* */ comments now.
author aziz
date Fri, 22 Jun 2007 20:41:04 +0000
parents 07e45c06a024
children 5d6968cc751e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
7 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
8 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
9 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
12 static const int ptable[256] = [
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
21 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
22
8ba2570de175 Initial import.
aziz
parents:
diff changeset
23 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
24 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
25 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
26 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
27 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
28 Alpha = 1<<3,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
29 Identifier = 1<<4
8ba2570de175 Initial import.
aziz
parents:
diff changeset
30 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
31
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
38 /+
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
39 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
40 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
41 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
42 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
43 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
44 if ('0' <= i && i <= '7')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
45 ptable[i] |= CProperty.Octal;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
46 if ('0' <= i && i <= '9')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
47 ptable[i] |= CProperty.Digit;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
49 ptable[i] |= CProperty.Hex;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
51 ptable[i] |= CProperty.Alpha;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
52 if (isalnum(i) || i == '_')
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
53 ptable[i] |= CProperty.Identifier;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
54 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
55 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
56 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
57 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
58 {
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
59 char c = ptable[i];
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
61 }
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
62 array.length = array.length - 2; // remove ",\n"
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
63 array ~= "\n]";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
64 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
65 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
66 +/
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
67
8ba2570de175 Initial import.
aziz
parents:
diff changeset
68 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
69 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
70 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
71 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
72 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
73 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
74
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
75 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
76 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
77 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
78 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
79 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
80
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
81 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
82 this.end = this.p + this.text.length;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
83 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
84
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
85 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
86 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
87 assert(p < end);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
88
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
89 char c = *p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
90
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
91 while(1)
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
92 {
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
93 t.start = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
94 if (c == 0)
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
95 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
96 t.type = TOK.EOF;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
97 t.end = p+1;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
98 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
99 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
100
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
101 if (isident(c) && !isdigit(c))
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
102 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
103 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
104 { c = *++p; }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
105 while (isident(c))
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
106 t.type = TOK.Identifier;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
107 t.end = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
108 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
109 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
110
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
111 if (c == '/')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
112 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
113 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
114 if (c == '+')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
115 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
116 uint level = 1;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
117 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
118 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
119 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
120 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
121 throw new Error("unterminated /+ +/ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
122 else if (c == '/' && p[1] == '+')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
123 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
124 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
125 ++level;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
126 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
127 else if (c == '+' && p[1] == '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
128 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
129 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
130 if (--level == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
131 break;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
132 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
133 } while (1)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
134 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
135 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
136 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
137 return;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
138 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
139 else if (c == '*')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
140 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
141 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
142 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
143 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
144 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
145 throw new Error("unterminated /* */ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
146 } while (c != '*' || p[1] != '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
147 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
148 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
149 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
150 return;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
151 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
152 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
153
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
154 c = *++p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
155 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
156 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
157
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
158 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
159 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
160 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
161 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
162 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
163
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
164 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
165 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
166 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
167 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
168 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
169 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
170 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
171 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
172 }