annotate trunk/src/Lexer.d @ 17:9bd0bac79479

- Removed Whitespace from enum list. - Added code to count current number of lines.
author aziz
date Sat, 23 Jun 2007 20:12:03 +0000
parents 476e8e55c1d4
children c48d2125f1e2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
7 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
8 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
9 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
12 static const int ptable[256] = [
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
13 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
15 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
16 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
17 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
18 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
19 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
20 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
29 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
30
8ba2570de175 Initial import.
aziz
parents:
diff changeset
31 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
33 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
35 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 Alpha = 1<<3,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
37 Underscore = 1<<4,
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
38 Whitespace = 1<<5
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
39 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
40
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
41 private alias CProperty CP;
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
42 int isoctal(char c) { return ptable[c] & CP.Octal; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
43 int isdigit(char c) { return ptable[c] & CP.Digit; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
44 int ishexad(char c) { return ptable[c] & CP.Hex; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
45 int isalpha(char c) { return ptable[c] & CP.Alpha; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
46 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
47 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
48 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); }
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
49 int isspace(char c) { return ptable[c] & CP.Whitespace; }
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
50
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
51 version(gen_ptable)
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
52 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
53 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
54 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
55 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
56 {
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
57 ptable[i] = 0;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
58 if ('0' <= i && i <= '7')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
59 ptable[i] |= CP.Octal;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
60 if ('0' <= i && i <= '9')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
61 ptable[i] |= CP.Digit;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
62 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
63 ptable[i] |= CP.Hex;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
64 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
65 ptable[i] |= CP.Alpha;
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
66 if (i == '_')
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
67 ptable[i] |= CP.Underscore;
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
68 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f')
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
69 ptable[i] |= CP.Whitespace;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
70 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
71 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
72 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
73 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
74 {
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
75 int c = ptable[i];
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
76 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n");
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
77 }
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
78 array[$-2..$] = "\n]";
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
79 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
80 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
81
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
82 const char[3] LS = \u2028;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
83 const char[3] PS = \u2029;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
84
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
85 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
86 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
87 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
88 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
89 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
90 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
91
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
92 uint loc = 1; /// line of code
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
93
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
94 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
95 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
96 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
97 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
98 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
99
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
100 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
101 this.end = this.p + this.text.length;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
102 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
103
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
104 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
105 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
106 assert(p < end);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
107
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
108 uint c = *p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
109
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
110 while(1)
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
111 {
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
112 t.start = p;
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
113
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
114 if (c == 0)
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
115 {
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
116 ++p;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
117 t.type = TOK.EOF;
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
118 t.end = p;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
119 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
120 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
121
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
122 if (c == '\n')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
123 {
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
124 c = *++p;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
125 ++loc;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
126 continue;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
127 }
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
128 else if (c == '\r')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
129 {
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
130 c = *++p;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
131 if (c != '\n')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
132 ++loc;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
133 continue;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
134 }
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
135
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
136 if (isidbeg(c))
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
137 {
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
138 Lidentifier:
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
139 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
140 { c = *++p; }
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
141 while (isident(c) || c & 128 && isUniAlpha(decodeUTF()))
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
142 t.type = TOK.Identifier;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
143 t.end = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
144 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
145 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
146
15
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
147 if (isdigit(c))
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
148 return scanNumber(t);
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
149
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
150 if (c == '/')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
151 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
152 c = *++p;
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
153 switch(c)
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
154 {
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
155 case '=':
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
156 ++p;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
157 t.type = TOK.DivisionAssign;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
158 t.end = p;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
159 return;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
160 case '+':
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
161 uint level = 1;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
162 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
163 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
164 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
165 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
166 throw new Error("unterminated /+ +/ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
167 else if (c == '/' && p[1] == '+')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
168 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
169 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
170 ++level;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
171 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
172 else if (c == '+' && p[1] == '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
173 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
174 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
175 if (--level == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
176 break;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
177 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
178 } while (1)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
179 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
180 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
181 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
182 return;
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
183 case '*':
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
184 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
185 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
186 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
187 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
188 throw new Error("unterminated /* */ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
189 } while (c != '*' || p[1] != '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
190 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
191 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
192 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
193 return;
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
194 case '/':
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
195 do
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
196 {
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
197 c = *++p;
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
198 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
199 break;
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
200 } while (c != '\n' && c != 0)
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
201 t.type = TOK.Comment;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
202 t.end = p;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
203 return;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
204 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
205 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
206
9
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
207 if (c == '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
208 {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
209 do {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
210 c = *++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
211 if (c == 0)
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
212 throw new Error("unterminated string literal.");
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
213 if (c == '\\')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
214 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
215 } while (c != '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
216 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
217 t.type = TOK.String;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
218 t.end = p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
219 return;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
220 }
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
221
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
222 if (c == '\'')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
223 {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
224 do {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
225 c = *++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
226 if (c == 0)
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
227 throw new Error("unterminated character literal.");
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
228 if (c == '\\')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
229 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
230 } while (c != '\'')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
231 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
232 t.type = TOK.Character;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
233 t.end = p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
234 return;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
235 }
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
236
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
237 if (c & 128 && isUniAlpha(decodeUTF()))
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
238 goto Lidentifier;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
239 c = *++p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
240 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
241 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
242
15
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
243 void scanNumber(ref Token t)
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
244 {
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
245 while (isdigit(*++p)) {}
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
246 t.type = TOK.Number;
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
247 t.end = p;
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
248 }
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
249
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
250 uint decodeUTF()
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
251 {
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
252 assert(*p & 128);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
253 size_t idx;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
254 uint d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
255 d = std.utf.decode(p[0 .. end-p], idx);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
256 p += idx -1;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
257 return d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
258 }
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
259
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
260 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
261 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
262 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
263 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
264 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
265
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
266 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
267 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
268 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
269 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
270 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
271 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
272 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
273 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
274 }