Mercurial > projects > dang
annotate lexer/Lexer.d @ 36:ce17bea8e9bd new_gen
Switch statements support
Can only switch on IntegerLit's but multiple values per case and the
default are supported.
An error is emitted if a value is used multiple times or if theres is more
than one default block
author | Anders Halager <halager@gmail.com> |
---|---|
date | Sun, 20 Apr 2008 22:39:07 +0200 |
parents | 41d23f2762c3 |
children | f977aa28eb32 |
rev | line source |
---|---|
1 | 1 module lexer.Lexer; |
2 | |
3 import misc.Error, | |
4 misc.DataSource; | |
5 | |
6 import lexer.Token, | |
7 lexer.Keyword; | |
8 | |
9 import tango.io.Stdout; | |
10 | |
11 class Lexer | |
12 { | |
13 public: | |
14 this (DataSource source) | |
15 { | |
16 this.source = source; | |
17 this.position = 0; | |
18 } | |
19 | |
20 Token next () | |
21 { | |
22 switch (getNextChar) | |
23 { | |
24 case CharType.EOF: | |
25 Location l; | |
26 return Token (Tok.EOF, l, 0); | |
27 | |
28 case CharType.Whitespace: | |
29 position += 1; | |
30 return this.next; | |
31 | |
32 case CharType.Symbol: | |
33 return lexSymbol; | |
34 | |
35 case CharType.Letter: | |
36 return lexLetter; | |
37 | |
38 case CharType.Number: | |
39 return lexNumber; | |
40 } | |
41 } | |
42 | |
43 Token peek ( int skip = 0) | |
44 { | |
45 int oldPosition = this.position; | |
46 while(skip-- > 0) | |
47 this.next; | |
48 Token t = this.next; | |
49 this.position = oldPosition; | |
50 return t; | |
51 } | |
52 | |
53 public Error[] getErrors() | |
54 { | |
55 return this.errors; | |
56 } | |
57 private: | |
58 | |
59 Token lexNumber () | |
60 { | |
61 int i = 0; | |
62 while(getNextChar(++i) == CharType.Number) | |
63 {} | |
64 | |
65 position += i; | |
66 | |
67 return Token(Tok.Integer, Location(position - i, this.source), i); | |
68 } | |
69 | |
70 Token lexSymbol () | |
71 { | |
72 switch(source.data[position++]) | |
73 { | |
74 case '(': | |
75 return Token(Tok.OpenParentheses, Location(position - 1, this.source), 1); | |
76 case ')': | |
77 return Token(Tok.CloseParentheses, Location(position - 1, this.source), 1); | |
78 case '{': | |
79 return Token(Tok.OpenBrace, Location(position - 1, this.source), 1); | |
80 case '}': | |
81 return Token(Tok.CloseBrace, Location(position - 1, this.source), 1); | |
82 case ';': | |
83 return Token(Tok.Seperator, Location(position - 1, this.source), 1); | |
36
ce17bea8e9bd
Switch statements support
Anders Halager <halager@gmail.com>
parents:
29
diff
changeset
|
84 case ':': |
ce17bea8e9bd
Switch statements support
Anders Halager <halager@gmail.com>
parents:
29
diff
changeset
|
85 return Token(Tok.Colon, Location(position - 1, this.source), 1); |
28
69464d465284
Now supporting structs - both read and write. Still a few errors though, so watch out.
Anders Johnsen <skabet@gmail.com>
parents:
15
diff
changeset
|
86 case '.': |
69464d465284
Now supporting structs - both read and write. Still a few errors though, so watch out.
Anders Johnsen <skabet@gmail.com>
parents:
15
diff
changeset
|
87 return Token(Tok.Dot, Location(position - 1, this.source), 1); |
1 | 88 case ',': |
89 return Token(Tok.Comma, Location(position - 1, this.source), 1); | |
90 case '=': | |
6 | 91 if(source.data[position] == '=') |
8
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
92 return Token(Tok.Eq, Location(position++ - 1, this.source), 2); |
1 | 93 return Token(Tok.Assign, Location(position - 1, this.source), 1); |
8
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
94 case '!': |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
95 if(source.data[position] == '=') |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
96 return Token(Tok.Ne, Location(position++ - 1, this.source), 2); |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
97 return Token(Tok.Not, Location(position - 1, this.source), 1); |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
98 case '<': |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
99 if(source.data[position] == '=') |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
100 return Token(Tok.Le, Location(position++ - 1, this.source), 2); |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
101 return Token(Tok.Lt, Location(position - 1, this.source), 1); |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
102 case '>': |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
103 if(source.data[position] == '=') |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
104 return Token(Tok.Ge, Location(position++ - 1, this.source), 2); |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
105 return Token(Tok.Gt, Location(position - 1, this.source), 1); |
1 | 106 case '+': |
107 return Token(Tok.Add, Location(position - 1, this.source), 1); | |
108 case '-': | |
109 return Token(Tok.Sub, Location(position - 1, this.source), 1); | |
110 case '*': | |
111 return Token(Tok.Mul, Location(position - 1, this.source), 1); | |
112 case '/': | |
113 switch(source.data[position]) | |
114 { | |
115 case '/': | |
116 while(getNextChar != CharType.EOF) | |
117 { | |
118 if(source.data[position++] == '\n') | |
119 return this.next; | |
120 } | |
121 return Token(Tok.EOF, Location(position, this.source), 0); | |
122 | |
123 case '*': | |
4
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
124 position += 2; |
1 | 125 while(getNextChar != CharType.EOF) |
126 { | |
127 ++position; | |
128 if(source.data[position-2] == '*') | |
129 if(source.data[position-1] == '/') | |
130 return this.next; | |
131 } | |
29
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
132 throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); |
1 | 133 |
134 case '+': | |
4
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
135 position += 2; |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
136 int nesting = 1; |
1 | 137 while(getNextChar != CharType.EOF) |
138 { | |
139 ++position; | |
140 if(source.data[position-2] == '+') | |
141 if(source.data[position-1] == '/') | |
4
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
142 { |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
143 position++; |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
144 nesting--; |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
145 } |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
146 |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
147 if(source.data[position-2] == '/') |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
148 if(source.data[position-1] == '+') |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
149 { |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
150 nesting++; |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
151 position++; |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
152 } |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
153 |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
154 if(nesting == 0) |
c09464468e1d
Updated lexer with comment handling to run Comments test at succes rate
johnsen@johnsen-desktop
parents:
2
diff
changeset
|
155 return this.next; |
1 | 156 } |
29
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
157 throw error(__LINE__, "Unexpected end of file. Unclosed comment block"); |
1 | 158 |
159 default: | |
160 return Token(Tok.Div, Location(position - 1, this.source), 1); | |
161 } | |
162 } | |
163 } | |
164 | |
165 Token lexLetter () | |
166 { | |
167 int i = 0; | |
168 bool hasNumber = false; | |
169 while (getNextChar(++i) == CharType.Letter || | |
170 getNextChar(i) == CharType.Number) | |
171 { | |
172 if (getNextChar(i) == CharType.Number) | |
173 { | |
174 hasNumber = true; | |
175 } | |
176 } | |
177 | |
178 Token t = Token(Tok.Identifier, Location(position, source), i); | |
179 | |
180 if (!hasNumber) | |
181 { | |
182 char[] str = source.data[position .. position + i]; | |
183 if(str in keywords) | |
184 t.type = keywords[str]; | |
185 } | |
186 | |
187 position += i; | |
188 | |
189 return t; | |
190 } | |
191 | |
192 CharType getNextChar(int offset = 0) | |
193 { | |
194 if (position + offset >= this.source.data.length) | |
195 return CharType.EOF; | |
196 | |
197 char current = source.data[position + offset]; | |
198 | |
199 if (current >= 'A' && current <= 'Z' || | |
200 current >= 'a' && current <= 'z' || current > 127) | |
201 return CharType.Letter; | |
202 | |
203 if (current >= '0' && current <= '9') | |
204 return CharType.Number; | |
205 | |
206 switch(current) | |
207 { | |
208 case ' ': | |
209 case '\n': | |
210 return CharType.Whitespace; | |
211 | |
212 case '(': | |
213 case ')': | |
214 case '{': | |
215 case '}': | |
216 case ';': | |
36
ce17bea8e9bd
Switch statements support
Anders Halager <halager@gmail.com>
parents:
29
diff
changeset
|
217 case ':': |
28
69464d465284
Now supporting structs - both read and write. Still a few errors though, so watch out.
Anders Johnsen <skabet@gmail.com>
parents:
15
diff
changeset
|
218 case '.': |
1 | 219 case ',': |
220 case '=': | |
8
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
221 case '!': |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
222 case '<': |
2e1069ee21af
Added Ne, Lt, Le, Gt, Ge and Not in lexer
johnsen@johnsen-desktop
parents:
6
diff
changeset
|
223 case '>': |
1 | 224 case '+': |
225 case '-': | |
226 case '*': | |
227 case '/': | |
228 return CharType.Symbol; | |
229 | |
6 | 230 default: |
29
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
231 throw error(__LINE__, "Read invalid symbol: '%0'").arg(current); |
1 | 232 } |
233 | |
234 } | |
235 | |
29
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
236 Error error(uint line, char[] msg) |
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
237 { |
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
238 return (new Error(msg)).loc(Location(position, source)); |
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
239 } |
41d23f2762c3
Merge, and updated Error class
Anders Halager <halager@gmail.com>
parents:
28
diff
changeset
|
240 |
1 | 241 DataSource source; |
242 int position; | |
243 Error[] errors; | |
244 } | |
245 | |
246 enum CharType : ubyte | |
247 { | |
248 Letter, | |
249 Number, | |
250 Symbol, | |
251 Whitespace, | |
252 | |
253 EOF | |
254 } |