comparison trunk/src/dil/Lexer.d @ 490:47be6bfe39cd

Refactored code and added new modules. Added ProtectionDeclaration and changed Parser to make use of it. Moved class Location to its own module. Moved some Lexer functions to new module LexerFuncs. Moved Lexer.getLocation() to struct Token. Added methods semantic() and error() to class Expression. Added method error() to class Scope.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Tue, 04 Dec 2007 23:31:20 +0100
parents a7291d3ee9d7
children 9a7ca8c56e59
comparison
equal deleted inserted replaced
489:a7291d3ee9d7 490:47be6bfe39cd
16 import tango.stdc.string : strlen; 16 import tango.stdc.string : strlen;
17 import std.utf; 17 import std.utf;
18 import std.uni; 18 import std.uni;
19 import common; 19 import common;
20 20
21 const char[3] LS = \u2028; /// Line separator. 21 public import dil.LexerFuncs;
22 const char[3] PS = \u2029; /// Paragraph separator.
23 const dchar LSd = 0x2028;
24 const dchar PSd = 0x2029;
25 static assert(LS[0] == PS[0] && LS[1] == PS[1]);
26 22
27 /// U+FFFD = �. Used to replace invalid Unicode characters. 23 /// U+FFFD = �. Used to replace invalid Unicode characters.
28 const dchar REPLACEMENT_CHAR = '\uFFFD'; 24 const dchar REPLACEMENT_CHAR = '\uFFFD';
29
30 const uint _Z_ = 26; /// Control+Z
31 25
32 class Lexer 26 class Lexer
33 { 27 {
34 Token* head; /// The head of the doubly linked token list. 28 Token* head; /// The head of the doubly linked token list.
35 Token* tail; /// The tail of the linked list. Set in scan(). 29 Token* tail; /// The tail of the linked list. Set in scan().
204 { 198 {
205 scanNext(this.token); 199 scanNext(this.token);
206 return this.token.type; 200 return this.token.type;
207 } 201 }
208 202
209 /// Returns true if d is a Unicode line or paragraph separator.
210 static bool isUnicodeNewlineChar(dchar d)
211 {
212 return d == LSd || d == PSd;
213 }
214
215 /// Returns true if p points to a line or paragraph separator.
216 static bool isUnicodeNewline(char* p)
217 {
218 return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]);
219 }
220
221 /++
222 Returns true if p points to the start of a Newline.
223 Newline: \n | \r | \r\n | LS | PS
224 +/
225 static bool isNewline(char* p)
226 {
227 return *p == '\n' || *p == '\r' || isUnicodeNewline(p);
228 }
229
230 /// Returns true if p points to the last character of a Newline. 203 /// Returns true if p points to the last character of a Newline.
231 bool isNewlineEnd(char* p) 204 bool isNewlineEnd(char* p)
232 { 205 {
233 if (*p == '\n' || *p == '\r') 206 if (*p == '\n' || *p == '\r')
234 return true; 207 return true;
235 if (*p == LS[2] || *p == PS[2]) 208 if (*p == LS[2] || *p == PS[2])
236 if ((p-2) >= text.ptr) 209 if ((p-2) >= text.ptr)
237 if (p[-1] == LS[1] && p[-2] == LS[0]) 210 if (p[-1] == LS[1] && p[-2] == LS[0])
238 return true; 211 return true;
239 return false; 212 return false;
240 }
241
242 /++
243 Returns true if p points to the first character of an EndOfLine.
244 EndOfLine: Newline | 0 | _Z_
245 +/
246 static bool isEndOfLine(char* p)
247 {
248 return isNewline(p) || *p == 0 || *p == _Z_;
249 }
250
251 /++
252 Scans a Newline and sets p one character past it.
253 Returns '\n' if scanned or 0 otherwise.
254 +/
255 static dchar scanNewline(ref char* p)
256 {
257 switch (*p)
258 {
259 case '\r':
260 if (p[1] == '\n')
261 ++p;
262 case '\n':
263 ++p;
264 return '\n';
265 default:
266 if (isUnicodeNewline(p))
267 {
268 ++p; ++p; ++p;
269 return '\n';
270 }
271 }
272 return 0;
273 }
274
275 /// Returns a Location for the given token.
276 static Location getLocation(Token* token)
277 {
278 auto search_t = token.prev;
279 // Find previous newline token.
280 while (search_t.type != TOK.Newline)
281 search_t = search_t.prev;
282 auto filePath = search_t.filePath;
283 auto lineNum = search_t.lineNum - search_t.lineNum_hline;
284 auto lineBegin = search_t.end;
285 // Determine actual line begin and line number.
286 while (1)
287 {
288 search_t = search_t.next;
289 if (search_t == token)
290 break;
291 // Multiline tokens must be rescanned for newlines.
292 if (search_t.isMultiline)
293 {
294 auto p = search_t.start, end = search_t.end;
295 while (p != end)
296 {
297 if (Lexer.scanNewline(p) == '\n')
298 {
299 lineBegin = p;
300 ++lineNum;
301 }
302 else
303 ++p;
304 }
305 }
306 }
307 return new Location(filePath, lineNum, lineBegin, token.start);
308 } 213 }
309 214
310 /++ 215 /++
311 This is the old scan method. 216 This is the old scan method.
312 TODO: profile old and new to see which one is faster. 217 TODO: profile old and new to see which one is faster.