Mercurial > projects > dil
annotate trunk/src/cmd/Generate.d @ 390:4d36eea1bbc9
Refactored Lexer.scan().
Illegal characters are not ignored anymore. They are reported as errors.
Added a new member 'ws' to Token. When a token is scanned the lexer sets
ws to the leading whitespace or leaves it at null when no whitespace was found.
Added Illegal to enum TOK and IllegalCharacter to enum MID.
Added localized messages for MID.IllegalCharacter.
Adapted code of cmd.Generate to make use of Token.ws.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Wed, 12 Sep 2007 21:03:41 +0200 |
parents | c4bfceab7246 |
children | 33b566df6af4 |
rev | line source |
---|---|
363 | 1 /++ |
2 Author: Aziz Köksal | |
3 License: GPL3 | |
4 +/ | |
5 module cmd.Generate; | |
6 import dil.SyntaxTree; | |
7 import dil.Token; | |
8 import dil.Parser, dil.Lexer; | |
9 import dil.File; | |
10 import std.stdio; | |
11 | |
12 enum DocOption | |
13 { | |
14 Tokens, | |
15 Syntax = 1<<1, | |
16 HTML = 1<<2, | |
17 XML = 1<<3 | |
18 } | |
19 | |
364 | 20 void execute(string fileName, DocOption options) |
21 { | |
22 if (options & DocOption.Syntax) | |
23 syntaxToDoc(fileName, options); | |
24 else | |
25 tokensToDoc(fileName, options); | |
26 } | |
27 | |
363 | 28 char[] xml_escape(char[] text) |
29 { | |
30 char[] result; | |
31 foreach(c; text) | |
32 switch(c) | |
33 { | |
34 case '<': result ~= "<"; break; | |
35 case '>': result ~= ">"; break; | |
36 case '&': result ~= "&"; break; | |
37 default: result ~= c; | |
38 } | |
39 return result; | |
40 } | |
41 | |
42 char[] getShortClassName(Node n) | |
43 { | |
44 static char[][] name_table; | |
45 if (name_table is null) | |
46 name_table = new char[][NodeKind.max+1]; | |
47 char[] name = name_table[n.kind]; | |
48 if (name !is null) | |
49 return name; | |
50 | |
51 alias std.string.find find; | |
52 name = n.classinfo.name; | |
53 name = name[find(name, ".")+1 .. $]; // Remove package name | |
54 name = name[find(name, ".")+1 .. $]; // Remove module name | |
55 char[] remove; | |
56 switch (n.category) | |
57 { | |
58 alias NodeCategory NC; | |
59 case NC.Declaration: remove = "Declaration"; break; | |
60 case NC.Statement: | |
61 if (n.kind == NodeKind.Statements) | |
62 return name; | |
63 remove = "Statement"; | |
64 break; | |
65 case NC.Expression: remove = "Expression"; break; | |
66 case NC.Type: remove = "Type"; break; | |
67 case NC.Other: return name; | |
68 default: | |
69 } | |
70 // Remove common suffix. | |
71 auto idx = find(name, remove); | |
72 if (idx != -1) | |
73 name = name[0 .. idx]; | |
74 // Store the name. | |
75 name_table[n.kind] = name; | |
76 return name; | |
77 } | |
78 | |
79 enum DocPart | |
80 { | |
81 Head, | |
82 CompBegin, | |
83 CompEnd, | |
84 Error, | |
85 SyntaxBegin, | |
86 SyntaxEnd, | |
87 SrcBegin, | |
88 SrcEnd, | |
89 Tail, | |
90 // Tokens: | |
91 Identifier, | |
92 Comment, | |
93 StringLiteral, | |
94 CharLiteral, | |
95 Operator, | |
96 LorG, | |
97 LessEqual, | |
98 GreaterEqual, | |
99 AndLogical, | |
100 OrLogical, | |
101 NotEqual, | |
102 Not, | |
103 Number, | |
104 Bracket, | |
105 SpecialToken, | |
106 Shebang, | |
107 Keyword, | |
108 HLineBegin, | |
109 HLineEnd, | |
110 Filespec, | |
111 } | |
112 | |
113 auto html_tags = [ | |
114 // Head | |
115 `<html>`\n | |
116 `<head>`\n | |
117 `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">`\n | |
118 `<link href="dil_html.css" rel="stylesheet" type="text/css">`\n | |
119 `</head>`\n | |
120 `<body>`[], | |
121 // CompBegin | |
122 `<div class="compilerinfo">`, | |
123 // CompEnd | |
124 `</div>`, | |
125 // Error | |
126 `<p class="error %s">%s(%d)%s: %s</p>`, | |
127 // SyntaxBegin | |
128 `<span class="%s %s">`, | |
129 // SyntaxEnd | |
130 `</span>`, | |
131 // SrcBegin | |
132 `<pre class="sourcecode">`, | |
133 // SrcEnd | |
134 `</pre>`, | |
135 // Tail | |
136 `</html>`, | |
137 // Identifier | |
138 `<span class="i">%s</span>`, | |
139 // Comment | |
140 `<span class="c%s">%s</span>`, | |
141 // StringLiteral | |
142 `<span class="sl">%s</span>`, | |
143 // CharLiteral | |
144 `<span class="cl">%s</span>`, | |
145 // Operator | |
146 `<span class="op">%s</span>`, | |
147 // LorG | |
148 `<span class="oplg"><></span>`, | |
149 // LessEqual | |
150 `<span class="ople"><=</span>`, | |
151 // GreaterEqual | |
152 `<span class="opge">>=</span>`, | |
153 // AndLogical | |
154 `<span class="opaa">&&</span>`, | |
155 // OrLogical | |
156 `<span class="opoo">||</span>`, | |
157 // NotEqual | |
158 `<span class="opne">!=</span>`, | |
159 // Not | |
160 `<span class="opn">!</span>`, | |
161 // Number | |
162 `<span class="n">%s</span>`, | |
163 // Bracket | |
164 `<span class="br">%s</span>`, | |
165 // SpecialToken | |
166 `<span class="st">%s</span>`, | |
167 // Shebang | |
168 `<span class="shebang">%s</span>`, | |
169 // Keyword | |
170 `<span class="k">%s</span>`, | |
171 // HLineBegin | |
388
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
172 `<span class="hl">`, |
363 | 173 // HLineEnd |
174 "</span>", | |
175 // Filespec | |
176 `<span class="fs">%s</span>`, | |
177 ]; | |
178 | |
179 auto xml_tags = [ | |
180 // Head | |
181 `<?xml version="1.0"?>`\n | |
182 `<?xml-stylesheet href="dil_xml.css" type="text/css"?>`\n | |
183 `<root>`[], | |
184 // CompBegin | |
185 `<compilerinfo>`, | |
186 // CompEnd | |
187 `</compilerinfo>`, | |
188 // Error | |
189 `<error t="%s">%s(%d)%s: %s</error>`, | |
190 // SyntaxBegin | |
191 `<%s t="%s">`, | |
192 // SyntaxEnd | |
193 `</%s>`, | |
194 // SrcBegin | |
195 `<sourcecode>`, | |
196 // SrcEnd | |
197 `</sourcecode>`, | |
198 // Tail | |
199 `</root>`, | |
200 // Identifier | |
201 "<i>%s</i>", | |
202 // Comment | |
203 `<c t="%s">%s</c>`, | |
204 // StringLiteral | |
205 "<sl>%s</sl>", | |
206 // CharLiteral | |
207 "<cl>%s</cl>", | |
208 // Operator | |
209 "<op>%s</op>", | |
210 // LorG | |
211 `<op t="lg"><></op>`, | |
212 // LessEqual | |
213 `<op t="le"><=</op>`, | |
214 // GreaterEqual | |
215 `<op t="ge">>=</op>`, | |
216 // AndLogical | |
217 `<op t="aa">&&</op>`, | |
218 // OrLogical | |
219 `<op t="oo">||</op>`, | |
220 // NotEqual | |
221 `<op t="ne">!=</op>`, | |
222 // Not | |
223 `<op t="n">!</op>`, | |
224 // Number | |
225 "<n>%s</n>", | |
226 // Bracket | |
227 "<br>%s</br>", | |
228 // SpecialToken | |
229 "<st>%s</st>", | |
230 // Shebang | |
231 "<shebang>%s</shebang>", | |
232 // Keyword | |
233 "<k>%s</k>", | |
234 // HLineBegin | |
388
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
235 "<hl>", |
363 | 236 // HLineEnd |
237 "</hl>", | |
238 // Filespec | |
239 "<fs>%s</fs>", | |
240 ]; | |
241 | |
242 static assert(html_tags.length == DocPart.max+1); | |
243 static assert(xml_tags.length == DocPart.max+1); | |
244 | |
245 void syntaxToDoc(string fileName, DocOption options) | |
246 { | |
247 auto tags = options & DocOption.HTML ? html_tags : xml_tags; | |
248 auto sourceText = loadFile(fileName); | |
249 auto parser = new Parser(sourceText, fileName); | |
368 | 250 auto root = parser.start(); |
363 | 251 auto lx = parser.lx; |
252 | |
253 auto token = lx.head; | |
254 | |
255 writefln(tags[DocPart.Head]); | |
256 // Output error messages. | |
257 if (lx.errors.length || parser.errors.length) | |
258 { | |
259 writefln(tags[DocPart.CompBegin]); | |
260 foreach (error; lx.errors) | |
261 { | |
262 writefln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); | |
263 } | |
264 foreach (error; parser.errors) | |
265 { | |
266 writefln(tags[DocPart.Error], "P", lx.fileName, error.loc, "P", xml_escape(error.getMsg)); | |
267 } | |
268 writefln(tags[DocPart.CompEnd]); | |
269 } | |
270 writef(tags[DocPart.SrcBegin]); | |
271 | |
272 Node[][Token*] beginNodes, endNodes; | |
273 | |
274 void populateAAs(Node[] nodes) | |
275 { | |
276 foreach (node; nodes) | |
277 { | |
379
a92f7826a4fa
- Fix in parseTemplateArguments_(): calling nested parseType_() instead of parseType() in try_().
aziz
parents:
368
diff
changeset
|
278 assert(node !is null); |
363 | 279 auto begin = node.begin; |
280 if (begin) | |
281 { | |
282 auto end = node.end; | |
283 assert(end); | |
284 beginNodes[begin] ~= node; | |
285 endNodes[end] ~= node; | |
286 } | |
287 if (node.children.length) | |
288 populateAAs(node.children); | |
289 } | |
290 } | |
291 populateAAs(root.children); | |
292 | |
293 char[] getTag(NodeCategory nc) | |
294 { | |
295 char[] tag; | |
296 switch (nc) | |
297 { | |
298 alias NodeCategory NC; | |
299 case NC.Declaration: tag = "d"; break; | |
300 case NC.Statement: tag = "s"; break; | |
301 case NC.Expression: tag = "e"; break; | |
302 case NC.Type: tag = "t"; break; | |
303 case NC.Other: tag = "o"; break; | |
304 default: | |
305 } | |
306 return tag; | |
307 } | |
308 | |
309 // Traverse linked list and print tokens. | |
310 while (token.type != TOK.EOF) | |
311 { | |
312 token = token.next; | |
313 | |
314 Node[]* nodes = token in beginNodes; | |
315 | |
316 if (nodes) | |
317 { | |
318 foreach (node; *nodes) | |
319 writef(tags[DocPart.SyntaxBegin], getTag(node.category), getShortClassName(node)); | |
320 } | |
321 | |
322 printToken(token, tags); | |
323 | |
324 nodes = token in endNodes; | |
325 | |
326 if (nodes) | |
327 { | |
328 foreach_reverse (node; *nodes) | |
329 if (options & DocOption.HTML) | |
330 writef(tags[DocPart.SyntaxEnd]); | |
331 else | |
332 writef(tags[DocPart.SyntaxEnd], getTag(node.category)); | |
333 } | |
334 } | |
335 writef(tags[DocPart.SrcEnd], tags[DocPart.Tail]); | |
336 } | |
337 | |
338 void tokensToDoc(string fileName, DocOption options) | |
339 { | |
340 auto tags = options & DocOption.HTML ? html_tags : xml_tags; | |
341 auto sourceText = loadFile(fileName); | |
342 auto lx = new Lexer(sourceText, fileName); | |
343 | |
344 auto token = lx.getTokens(); | |
345 | |
346 writefln(tags[DocPart.Head]); | |
347 | |
348 if (lx.errors.length) | |
349 { | |
350 writefln(tags[DocPart.CompBegin]); | |
351 foreach (error; lx.errors) | |
352 { | |
353 writefln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); | |
354 } | |
355 writefln(tags[DocPart.CompEnd]); | |
356 } | |
357 writef(tags[DocPart.SrcBegin]); | |
358 | |
359 // Traverse linked list and print tokens. | |
360 while (token.type != TOK.EOF) | |
361 { | |
362 token = token.next; | |
363 printToken(token, tags); | |
364 } | |
365 writef(\n, tags[DocPart.SrcEnd], \n, tags[DocPart.Tail]); | |
366 } | |
367 | |
368 void printToken(Token* token, string[] tags) | |
369 { | |
370 alias DocPart DP; | |
371 string srcText = xml_escape(token.srcText); | |
372 | |
390
4d36eea1bbc9
Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
389
diff
changeset
|
373 // Print whitespace. |
4d36eea1bbc9
Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
389
diff
changeset
|
374 if (token.ws) |
4d36eea1bbc9
Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
389
diff
changeset
|
375 writef(token.ws[0..token.start - token.ws]); |
4d36eea1bbc9
Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
389
diff
changeset
|
376 |
363 | 377 switch(token.type) |
378 { | |
379 case TOK.Identifier: | |
380 writef(tags[DP.Identifier], srcText); | |
381 break; | |
382 case TOK.Comment: | |
383 string t; | |
384 switch (token.start[1]) | |
385 { | |
386 case '/': t = "l"; break; | |
387 case '*': t = "b"; break; | |
388 case '+': t = "n"; break; | |
389 default: | |
390 assert(0); | |
391 } | |
392 writef(tags[DP.Comment], t, srcText); | |
393 break; | |
394 case TOK.String: | |
395 writef(tags[DP.StringLiteral], srcText); | |
396 break; | |
397 case TOK.CharLiteral, TOK.WCharLiteral, TOK.DCharLiteral: | |
398 writef(tags[DP.CharLiteral], srcText); | |
399 break; | |
400 case TOK.Assign, TOK.Equal, | |
401 TOK.Less, TOK.Greater, | |
402 TOK.LShiftAssign, TOK.LShift, | |
403 TOK.RShiftAssign, TOK.RShift, | |
404 TOK.URShiftAssign, TOK.URShift, | |
405 TOK.OrAssign, TOK.OrBinary, | |
406 TOK.AndAssign, TOK.AndBinary, | |
407 TOK.PlusAssign, TOK.PlusPlus, TOK.Plus, | |
408 TOK.MinusAssign, TOK.MinusMinus, TOK.Minus, | |
409 TOK.DivAssign, TOK.Div, | |
410 TOK.MulAssign, TOK.Mul, | |
411 TOK.ModAssign, TOK.Mod, | |
412 TOK.XorAssign, TOK.Xor, | |
413 TOK.CatAssign, | |
414 TOK.Tilde, | |
415 TOK.Unordered, | |
416 TOK.UorE, | |
417 TOK.UorG, | |
418 TOK.UorGorE, | |
419 TOK.UorL, | |
420 TOK.UorLorE, | |
421 TOK.LorEorG: | |
422 writef(tags[DP.Operator], srcText); | |
423 break; | |
424 case TOK.LorG: | |
425 writef(tags[DP.LorG]); | |
426 break; | |
427 case TOK.LessEqual: | |
428 writef(tags[DP.LessEqual]); | |
429 break; | |
430 case TOK.GreaterEqual: | |
431 writef(tags[DP.GreaterEqual]); | |
432 break; | |
433 case TOK.AndLogical: | |
434 writef(tags[DP.AndLogical]); | |
435 break; | |
436 case TOK.OrLogical: | |
437 writef(tags[DP.OrLogical]); | |
438 break; | |
439 case TOK.NotEqual: | |
440 writef(tags[DP.NotEqual]); | |
441 break; | |
442 case TOK.Not: | |
443 // Check if this is part of a template instantiation. | |
444 // TODO: comments aren't skipped. | |
445 if (token.prev.type == TOK.Identifier && token.next.type == TOK.LParen) | |
446 goto default; | |
447 writef(tags[DP.Not]); | |
448 break; | |
449 case TOK.Int32, TOK.Int64, TOK.Uint32, TOK.Uint64, | |
450 TOK.Float32, TOK.Float64, TOK.Float80, | |
451 TOK.Imaginary32, TOK.Imaginary64, TOK.Imaginary80: | |
452 writef(tags[DP.Number], srcText); | |
453 break; | |
454 case TOK.LParen, TOK.RParen, TOK.LBracket, | |
455 TOK.RBracket, TOK.LBrace, TOK.RBrace: | |
456 writef(tags[DP.Bracket], srcText); | |
457 break; | |
458 case TOK.Shebang: | |
459 writef(tags[DP.Shebang], srcText); | |
460 break; | |
461 case TOK.HashLine: | |
462 void printWS(char* start, char* end) | |
463 { | |
464 if (start != end) | |
465 writef(start[0 .. end - start]); | |
466 } | |
467 writef(tags[DP.HLineBegin]); | |
468 auto num = token.line_num; | |
388
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
469 if (num is null) |
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
470 { |
389
c4bfceab7246
Applied fixes and improvements to hex float scanner.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
388
diff
changeset
|
471 writef(token.srcText); |
388
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
472 writef(tags[DP.HLineEnd]); |
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
473 break; |
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
474 } |
363 | 475 // Print whitespace between #line and number |
388
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
476 auto ptr = token.start; |
ae154eceba65
Applied some fixes to scanning and printing #line tokens.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents:
379
diff
changeset
|
477 printWS(ptr, num.start); // prints "#line" as well |
363 | 478 printToken(num, tags); |
479 if (token.line_filespec) | |
480 { | |
481 auto filespec = token.line_filespec; | |
482 // Print whitespace between number and filespec | |
483 printWS(num.end, filespec.start); | |
484 writef(tags[DP.Filespec], xml_escape(filespec.srcText)); | |
485 | |
486 ptr = filespec.end; | |
487 } | |
488 else | |
489 ptr = num.end; | |
490 // Print remaining whitespace | |
491 printWS(ptr, token.end); | |
492 writef(tags[DP.HLineEnd]); | |
493 break; | |
494 default: | |
495 if (token.isKeyword()) | |
496 writef(tags[DP.Keyword], srcText); | |
497 else if (token.isSpecialToken) | |
498 writef(tags[DP.SpecialToken], srcText); | |
499 else | |
500 writef("%s", srcText); | |
501 } | |
502 } |