annotate trunk/src/Lexer.d @ 28:3a9daccf7d96

- Added table for identifiers to Lexer. - Added keywords table. - Added keywords to TOK.
author aziz
date Sun, 24 Jun 2007 17:19:03 +0000
parents 43b6bf56f0e9
children ef83eea26bbd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
7 import Keywords;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
8 import Identifier;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
9 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
10 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
11 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
12
8ba2570de175 Initial import.
aziz
parents:
diff changeset
13 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
14 static const int ptable[256] = [
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
15 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
17 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
18 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
19 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
20 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,16,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
21 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
22 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
31 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32
8ba2570de175 Initial import.
aziz
parents:
diff changeset
33 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
35 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
37 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
38 Alpha = 1<<3,
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
39 Underscore = 1<<4,
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
40 Whitespace = 1<<5
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
41 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
42
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
43 private alias CProperty CP;
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
44 int isoctal(char c) { return ptable[c] & CP.Octal; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
45 int isdigit(char c) { return ptable[c] & CP.Digit; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
46 int ishexad(char c) { return ptable[c] & CP.Hex; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
47 int isalpha(char c) { return ptable[c] & CP.Alpha; }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
48 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
49 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); }
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
50 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); }
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
51 int isspace(char c) { return ptable[c] & CP.Whitespace; }
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
52
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
53 version(gen_ptable)
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
54 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
55 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
56 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
57 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
58 {
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
59 ptable[i] = 0;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
60 if ('0' <= i && i <= '7')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
61 ptable[i] |= CP.Octal;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
62 if ('0' <= i && i <= '9')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
63 ptable[i] |= CP.Digit;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
64 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
65 ptable[i] |= CP.Hex;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
66 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
67 ptable[i] |= CP.Alpha;
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
68 if (i == '_')
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
69 ptable[i] |= CP.Underscore;
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
70 if (i == ' ' || i == '\t' || i == '\v'|| i == '\f')
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
71 ptable[i] |= CP.Whitespace;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
72 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
73 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
74 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
75 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
76 {
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
77 int c = ptable[i];
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
78 array ~= std.string.format("%2d,", c, ((i+1) % 16) ? "":"\n");
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
79 }
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
80 array[$-2..$] = "\n]";
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
81 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
82 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
83
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
84 const char[3] LS = \u2028;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
85 const char[3] PS = \u2029;
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
86
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
87 const dchar LSd = 0x2028;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
88 const dchar PSd = 0x2029;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
89
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
90 /// Index into table of error messages.
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
91 enum MID
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
92 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
93 UnterminatedCharacterLiteral,
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
94 EmptyCharacterLiteral
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
95 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
96
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
97 string[] Messages = [
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
98 "unterminated character literal."
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
99 "empty character literal."
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
100 ];
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
101
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
102 class Problem
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
103 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
104 enum Type
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
105 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
106 Lexer,
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
107 Parser,
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
108 Semantic
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
109 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
110
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
111 MID id;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
112 Type type;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
113 uint loc;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
114 this(Type type, MID id, uint loc)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
115 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
116 this.id = id;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
117 this.type = type;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
118 this.loc = loc;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
119 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
120 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
121
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
122 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
123 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
124 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
125 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
126 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
127 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
128
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
129 uint loc = 1; /// line of code
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
130
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
131 Problem[] errors;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
132
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
133 Identifier[string] idtable;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
134
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
135 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
136 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
137 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
138 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
139 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
140
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
141 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
142 this.end = this.p + this.text.length;
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
143
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
144 loadKeywords();
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
145 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
146
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
147 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
148 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
149 assert(p < end);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
150
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
151 uint c = *p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
152
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
153 while(1)
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
154 {
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
155 t.start = p;
16
476e8e55c1d4 - Added Whitespace to the character properties table.
aziz
parents: 15
diff changeset
156
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
157 if (c == 0)
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
158 {
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
159 ++p;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
160 t.type = TOK.EOF;
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
161 t.end = p;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
162 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
163 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
164
17
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
165 if (c == '\n')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
166 {
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
167 c = *++p;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
168 ++loc;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
169 continue;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
170 }
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
171 else if (c == '\r')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
172 {
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
173 c = *++p;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
174 if (c != '\n')
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
175 ++loc;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
176 continue;
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
177 }
9bd0bac79479 - Removed Whitespace from enum list.
aziz
parents: 16
diff changeset
178
13
e5211758b63c - Added isidbeg() function.
aziz
parents: 12
diff changeset
179 if (isidbeg(c))
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
180 {
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
181 Lidentifier:
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
182 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
183 { c = *++p; }
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
184 while (isident(c) || c & 128 && isUniAlpha(decodeUTF()))
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
185
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
186 t.end = p;
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
187
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
188 string str = t.span;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
189 Identifier* id = str in idtable;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
190
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
191 if (!id)
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
192 {
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
193 idtable[str] = Identifier.Identifier(TOK.Identifier, str);
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
194 id = str in idtable;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
195 }
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
196 assert(id);
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
197 t.type = id.type;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
198 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
199 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
200
15
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
201 if (isdigit(c))
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
202 return scanNumber(t);
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
203
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
204 if (c == '/')
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
205 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
206 c = *++p;
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
207 switch(c)
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
208 {
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
209 case '=':
14
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
210 ++p;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
211 t.type = TOK.DivisionAssign;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
212 t.end = p;
cdf788d8bdaf - Parsing /= now.
aziz
parents: 13
diff changeset
213 return;
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
214 case '+':
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
215 uint level = 1;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
216 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
217 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
218 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
219 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
220 throw new Error("unterminated /+ +/ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
221 else if (c == '/' && p[1] == '+')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
222 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
223 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
224 ++level;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
225 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
226 else if (c == '+' && p[1] == '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
227 {
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
228 ++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
229 if (--level == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
230 break;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
231 }
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
232 } while (1)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
233 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
234 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
235 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
236 return;
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
237 case '*':
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
238 do
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
239 {
8
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
240 c = *++p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
241 if (c == 0)
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
242 throw new Error("unterminated /* */ comment.");
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
243 } while (c != '*' || p[1] != '/')
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
244 p += 2;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
245 t.type = TOK.Comment;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
246 t.end = p;
d4ba94a5a282 - Parsing /* */ comments now.
aziz
parents: 7
diff changeset
247 return;
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
248 case '/':
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
249 do
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
250 {
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
251 c = *++p;
11
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
252 if (c == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]))
dffcdaa7c47a - Added Unicode line and paragraph separators.
aziz
parents: 10
diff changeset
253 break;
10
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
254 } while (c != '\n' && c != 0)
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
255 t.type = TOK.Comment;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
256 t.end = p;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
257 return;
3ee65d6e39c9 - Parsing // comments now.
aziz
parents: 9
diff changeset
258 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
259 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
260
9
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
261 if (c == '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
262 {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
263 do {
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
264 c = *++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
265 if (c == 0)
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
266 throw new Error("unterminated string literal.");
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
267 if (c == '\\')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
268 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
269 } while (c != '"')
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
270 ++p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
271 t.type = TOK.String;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
272 t.end = p;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
273 return;
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
274 }
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
275
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 8
diff changeset
276 if (c == '\'')
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
277 return scanCharacterLiteral(t);
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
278
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
279 switch(c)
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
280 {
22
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
281 case '.':
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
282 if (p[1] == '.')
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
283 {
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
284 ++p;
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
285 if (p[1] == '.') {
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
286 ++p;
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
287 t.type = TOK.Ellipses;
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
288 }
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
289 else
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
290 t.type = TOK.Slice;
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
291 }
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
292 else
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
293 t.type = TOK.Dot;
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
294 goto Lcommon;
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
295 case '|':
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
296 c = *++p;
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
297 if (c == '=')
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
298 t.type = TOK.OrAssign;
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
299 else if (c == '|')
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
300 t.type = TOK.OrLogical;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
301 else {
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
302 t.type = TOK.OrBinary;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
303 goto Lcommon2;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
304 }
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
305 goto Lcommon;
24
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
306 case '&':
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
307 c = *++p;
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
308 if (c == '=')
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
309 t.type = TOK.AndAssign;
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
310 else if (c == '&')
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
311 t.type = TOK.AndLogical;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
312 else {
24
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
313 t.type = TOK.AndBinary;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
314 goto Lcommon2;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
315 }
24
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
316 goto Lcommon;
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
317 case '+':
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
318 c = *++p;
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
319 if (c == '=')
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
320 t.type = TOK.PlusAssign;
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
321 else if (c == '+')
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
322 t.type = TOK.PlusPlus;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
323 else {
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
324 t.type = TOK.Plus;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
325 goto Lcommon2;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
326 }
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
327 goto Lcommon;
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
328 case '-':
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
329 c = *++p;
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
330 if (c == '=')
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
331 t.type = TOK.MinusAssign;
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
332 else if (c == '-')
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
333 t.type = TOK.MinusMinus;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
334 else {
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
335 t.type = TOK.Minus;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
336 goto Lcommon2;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
337 }
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
338 goto Lcommon;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
339 case '=':
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
340 if (p[1] == '=') {
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
341 ++p;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
342 t.type = TOK.Equal;
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
343 }
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
344 else
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
345 t.type = TOK.Assign;
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
346 goto Lcommon;
27
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
347 case '~':
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
348 if (p[1] == '=') {
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
349 ++p;
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
350 t.type = TOK.CatAssign;
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
351 }
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
352 else
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
353 t.type = TOK.Tilde;
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
354 goto Lcommon;
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
355 // Single character tokens
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
356 case '(':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
357 t.type = TOK.LParen;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
358 goto Lcommon;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
359 case ')':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
360 t.type = TOK.RParen;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
361 goto Lcommon;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
362 case '[':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
363 t.type = TOK.LBracket;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
364 goto Lcommon;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
365 case ']':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
366 t.type = TOK.RBracket;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
367 goto Lcommon;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
368 case '{':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
369 t.type = TOK.LBrace;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
370 goto Lcommon;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
371 case '}':
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
372 t.type = TOK.RBrace;
21
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
373 goto Lcommon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
374 case ':':
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
375 t.type = TOK.Colon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
376 goto Lcommon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
377 case ';':
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
378 t.type = TOK.Semicolon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
379 goto Lcommon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
380 case '?':
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
381 t.type = TOK.Question;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
382 goto Lcommon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
383 case ',':
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
384 t.type = TOK.Comma;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
385 goto Lcommon;
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
386 case '$':
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
387 t.type = TOK.Dollar;
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
388 Lcommon:
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
389 ++p;
26
c3d7373db241 - Added code for parsing Assign and Equal tokens.
aziz
parents: 25
diff changeset
390 Lcommon2:
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
391 t.end = p;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
392 return;
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
393 default:
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
394 }
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 19
diff changeset
395
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
396 if (c & 128 && isUniAlpha(decodeUTF()))
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
397 goto Lidentifier;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
398 c = *++p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
399 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
400 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
401
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
402 void scanCharacterLiteral(ref Token t)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
403 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
404 assert(*p == '\'');
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
405 MID id = MID.UnterminatedCharacterLiteral;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
406 uint c = *++p;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
407 switch(c)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
408 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
409 case '\\':
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
410 ++p;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
411 break;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
412 case 0, 161, '\n', '\r':
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
413 goto Lerr;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
414 case '\'':
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
415 id = MID.EmptyCharacterLiteral;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
416 goto Lerr;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
417 default:
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
418 if (c & 128)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
419 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
420 c = decodeUTF();
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
421 if (c == LSd || c == PSd)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
422 goto Lerr;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
423 t.chr = c;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
424 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
425 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
426
19
f85832f9f24e - Parsing character literals more correctly.
aziz
parents: 18
diff changeset
427 ++p;
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
428 if (*p != '\'')
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
429 Lerr:
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
430 error(id);
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
431 ++p;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
432 t.type = TOK.Character;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
433 t.end = p;
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
434 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
435
15
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
436 void scanNumber(ref Token t)
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
437 {
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
438 while (isdigit(*++p)) {}
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
439 t.type = TOK.Number;
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
440 t.end = p;
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
441 }
c70c028e47dd - Started implementation of lexing numbers.
aziz
parents: 14
diff changeset
442
12
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
443 uint decodeUTF()
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
444 {
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
445 assert(*p & 128);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
446 size_t idx;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
447 uint d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
448 d = std.utf.decode(p[0 .. end-p], idx);
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
449 p += idx -1;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
450 return d;
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
451 }
0989206cf73c - Added code to decode Unicode characters in identifiers.
aziz
parents: 11
diff changeset
452
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
453 void loadKeywords()
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
454 {
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
455 foreach(k; keywords)
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
456 idtable[k.str] = k;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
457 }
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
458
18
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
459 void error(MID id)
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
460 {
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
461 errors ~= new Problem(Problem.Type.Lexer, id, loc);
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
462 }
c48d2125f1e2 - Moved code for scanning character literals to separate function.
aziz
parents: 17
diff changeset
463
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
464 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
465 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
466 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
467 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
468 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
469
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
470 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
471 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
472 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
473 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
474 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
475 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
476 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
477 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
478 }