annotate trunk/src/Lexer.d @ 7:07e45c06a024

- Parsing nested comments correctly now.
author aziz
date Fri, 22 Jun 2007 20:25:02 +0000
parents 79b4e8848794
children d4ba94a5a282
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
8ba2570de175 Initial import.
aziz
parents:
diff changeset
3 License: GPL2
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
8ba2570de175 Initial import.
aziz
parents:
diff changeset
5 module Lexer;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
6 import Token;
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
7 import std.stdio;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
8 import std.utf;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
9 import std.uni;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 /// ASCII character properties table.
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
12 static const int ptable[256] = [
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
13 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,23,23,23,23,23,23,23,22,22, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
15 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0,16,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
16 0,28,28,28,28,28,28,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
21 ];
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
22
8ba2570de175 Initial import.
aziz
parents:
diff changeset
23 enum CProperty
8ba2570de175 Initial import.
aziz
parents:
diff changeset
24 {
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
25 Octal = 1,
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
26 Digit = 1<<1,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
27 Hex = 1<<2,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
28 Alpha = 1<<3,
8ba2570de175 Initial import.
aziz
parents:
diff changeset
29 Identifier = 1<<4
8ba2570de175 Initial import.
aziz
parents:
diff changeset
30 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
31
8ba2570de175 Initial import.
aziz
parents:
diff changeset
32 int isoctal(char c) { return ptable[c] & CProperty.Octal; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
33 int isdigit(char c) { return ptable[c] & CProperty.Digit; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
34 int ishexad(char c) { return ptable[c] & CProperty.Hex; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
35 int isalpha(char c) { return ptable[c] & CProperty.Alpha; }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
36 int isalnum(char c) { return ptable[c] & (CProperty.Alpha | CProperty.Digit); }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
37 int isident(char c) { return ptable[c] & CProperty.Identifier; }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
38 /+
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
39 static this()
8ba2570de175 Initial import.
aziz
parents:
diff changeset
40 {
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
41 // Initialize character properties table.
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
42 for (int i; i < ptable.length; ++i)
8ba2570de175 Initial import.
aziz
parents:
diff changeset
43 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
44 if ('0' <= i && i <= '7')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
45 ptable[i] |= CProperty.Octal;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
46 if ('0' <= i && i <= '9')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
47 ptable[i] |= CProperty.Digit;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
48 if (isdigit(i) || 'a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
49 ptable[i] |= CProperty.Hex;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
50 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
8ba2570de175 Initial import.
aziz
parents:
diff changeset
51 ptable[i] |= CProperty.Alpha;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
52 if (isalnum(i) || i == '_')
1
f3cd3bfde4ba - Corrected some errors to make the file compile.
aziz
parents: 0
diff changeset
53 ptable[i] |= CProperty.Identifier;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
54 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
55 // Print a formatted array literal.
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
56 char[] array = "[\n";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
57 for (int i; i < ptable.length; ++i)
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
58 {
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
59 char c = ptable[i];
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
60 array ~= std.string.format("%2d,", c, ((i+1) % 32) ? "":"\n");
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
61 }
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
62 array.length = array.length - 2; // remove ",\n"
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
63 array ~= "\n]";
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
64 writefln(array);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
65 }
2
81c6cc33f5c8 - Initializing ptable with a precomputed array literal.
aziz
parents: 1
diff changeset
66 +/
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
67
8ba2570de175 Initial import.
aziz
parents:
diff changeset
68 class Lexer
8ba2570de175 Initial import.
aziz
parents:
diff changeset
69 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
70 Token token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
71 char[] text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
72 char* p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
73 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
74
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
75 this(char[] text)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
76 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
77 this.text = text;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
78 this.text.length = this.text.length + 1;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
79 this.text[$-1] = 0;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
80
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
81 this.p = this.text.ptr;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
82 this.end = this.p + this.text.length;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
83 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
84
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
85 public void scan(out Token t)
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
86 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
87 assert(p < end);
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
88
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
89 char c = *p;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
90
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
91 while(1)
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
92 {
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
93 t.start = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
94 if (c == 0)
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
95 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
96 t.type = TOK.EOF;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
97 t.end = p+1;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
98 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
99 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
100
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
101 if (isident(c) && !isdigit(c))
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
102 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
103 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
104 { c = *++p; }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
105 while (isident(c))
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
106 t.type = TOK.Identifier;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
107 t.end = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
108 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
109 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
110
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
111 if (c == '/' && p[1] == '+')
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
112 {
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
113 uint level = 1;
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
114 ++p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
115 do
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
116 {
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
117 c = *++p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
118 if (c == 0)
7
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
119 throw new Error("unterminated /+/+ +/+/ comment.");
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
120 else if (c == '/' && p[1] == '+')
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
121 {
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
122 ++p;
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
123 ++level;
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
124 }
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
125 else if (c == '+' && p[1] == '/')
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
126 {
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
127 ++p;
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
128 if (--level == 0)
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
129 break;
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
130 }
07e45c06a024 - Parsing nested comments correctly now.
aziz
parents: 5
diff changeset
131 } while (1)
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
132 p += 2;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
133 t.type = TOK.Comment;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
134 t.end = p;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
135 return;
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
136 }
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
137
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
138 c = *++p;
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
139 }
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
140 }
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
141
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
142 public TOK nextToken()
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
143 {
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
144 scan(this.token);
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
145 return this.token.type;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
146 }
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
147
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
148 Token[] getTokens()
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
149 {
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
150 Token[] tokens;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
151 while (nextToken() != TOK.EOF)
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
152 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
153 tokens ~= this.token;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
154 return tokens;
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 2
diff changeset
155 }
5
79b4e8848794 - Started writing XML generator.
aziz
parents: 4
diff changeset
156 }