comparison dmd/lexer.h @ 1:c53b6e3fe49a trunk

[svn r5] Initial commit. Most things are very rough.
author lindquist
date Sat, 01 Sep 2007 21:43:27 +0200
parents
children 788401029ecf
comparison
equal deleted inserted replaced
0:a9e71648e74d 1:c53b6e3fe49a
1
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2007 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
10
11 #ifndef DMD_LEXER_H
12 #define DMD_LEXER_H
13
14 #ifdef __DMC__
15 #pragma once
16 #endif /* __DMC__ */
17
18 #include "root.h"
19 #include "mars.h"
20
21 struct StringTable;
22 struct Identifier;
23 struct Module;
24
25 /* Tokens:
26 ( )
27 [ ]
28 { }
29 < > <= >= == != === !==
30 << >> <<= >>= >>> >>>=
31 + - += -=
32 * / % *= /= %=
33 & | ^ &= |= ^=
34 = ! ~
35 ++ --
36 . -> : ,
37 ? && ||
38 */
39
40 enum TOK
41 {
42 TOKreserved,
43
44 // Other
45 TOKlparen, TOKrparen,
46 TOKlbracket, TOKrbracket,
47 TOKlcurly, TOKrcurly,
48 TOKcolon, TOKneg,
49 TOKsemicolon, TOKdotdotdot,
50 TOKeof, TOKcast,
51 TOKnull, TOKassert,
52 TOKtrue, TOKfalse,
53 TOKarray, TOKcall,
54 TOKaddress, TOKtypedot,
55 TOKtype, TOKthrow,
56 TOKnew, TOKdelete,
57 TOKstar, TOKsymoff,
58 TOKvar, TOKdotvar,
59 TOKdotti, TOKdotexp,
60 TOKdottype, TOKslice,
61 TOKarraylength, TOKversion,
62 TOKmodule, TOKdollar,
63 TOKtemplate, TOKdottd,
64 TOKdeclaration, TOKtypeof,
65 TOKpragma, TOKdsymbol,
66 TOKtypeid, TOKuadd,
67 TOKremove,
68 TOKnewanonclass, TOKcomment,
69 TOKarrayliteral, TOKassocarrayliteral,
70 TOKstructliteral,
71
72 // Operators
73 TOKlt, TOKgt,
74 TOKle, TOKge,
75 TOKequal, TOKnotequal,
76 TOKidentity, TOKnotidentity,
77 TOKindex, TOKis,
78 TOKtobool,
79
80 // 60
81 // NCEG floating point compares
82 // !<>= <> <>= !> !>= !< !<= !<>
83 TOKunord,TOKlg,TOKleg,TOKule,TOKul,TOKuge,TOKug,TOKue,
84
85 TOKshl, TOKshr,
86 TOKshlass, TOKshrass,
87 TOKushr, TOKushrass,
88 TOKcat, TOKcatass, // ~ ~=
89 TOKadd, TOKmin, TOKaddass, TOKminass,
90 TOKmul, TOKdiv, TOKmod,
91 TOKmulass, TOKdivass, TOKmodass,
92 TOKand, TOKor, TOKxor,
93 TOKandass, TOKorass, TOKxorass,
94 TOKassign, TOKnot, TOKtilde,
95 TOKplusplus, TOKminusminus, TOKconstruct,
96 TOKdot, TOKarrow, TOKcomma,
97 TOKquestion, TOKandand, TOKoror,
98
99 // 103
100 // Numeric literals
101 TOKint32v, TOKuns32v,
102 TOKint64v, TOKuns64v,
103 TOKfloat32v, TOKfloat64v, TOKfloat80v,
104 TOKimaginary32v, TOKimaginary64v, TOKimaginary80v,
105
106 // Char constants
107 TOKcharv, TOKwcharv, TOKdcharv,
108
109 // Leaf operators
110 TOKidentifier, TOKstring,
111 TOKthis, TOKsuper,
112 TOKhalt, TOKtuple,
113
114 // Basic types
115 TOKvoid,
116 TOKint8, TOKuns8,
117 TOKint16, TOKuns16,
118 TOKint32, TOKuns32,
119 TOKint64, TOKuns64,
120 TOKfloat32, TOKfloat64, TOKfloat80,
121 TOKimaginary32, TOKimaginary64, TOKimaginary80,
122 TOKcomplex32, TOKcomplex64, TOKcomplex80,
123 TOKchar, TOKwchar, TOKdchar, TOKbit, TOKbool,
124 TOKcent, TOKucent,
125
126 // Aggregates
127 TOKstruct, TOKclass, TOKinterface, TOKunion, TOKenum, TOKimport,
128 TOKtypedef, TOKalias, TOKoverride, TOKdelegate, TOKfunction,
129 TOKmixin,
130
131 TOKalign, TOKextern, TOKprivate, TOKprotected, TOKpublic, TOKexport,
132 TOKstatic, /*TOKvirtual,*/ TOKfinal, TOKconst, TOKabstract, TOKvolatile,
133 TOKdebug, TOKdeprecated, TOKin, TOKout, TOKinout, TOKlazy,
134 TOKauto, TOKpackage,
135
136 // Statements
137 TOKif, TOKelse, TOKwhile, TOKfor, TOKdo, TOKswitch,
138 TOKcase, TOKdefault, TOKbreak, TOKcontinue, TOKwith,
139 TOKsynchronized, TOKreturn, TOKgoto, TOKtry, TOKcatch, TOKfinally,
140 TOKasm, TOKforeach, TOKforeach_reverse,
141 TOKscope,
142 TOKon_scope_exit, TOKon_scope_failure, TOKon_scope_success,
143
144 // Contracts
145 TOKbody, TOKinvariant,
146
147 // Testing
148 TOKunittest,
149
150 // Added after 1.0
151 TOKref,
152 TOKmacro,
153
154 TOKMAX
155 };
156
157 #define CASE_BASIC_TYPES \
158 case TOKwchar: case TOKdchar: \
159 case TOKbit: case TOKbool: case TOKchar: \
160 case TOKint8: case TOKuns8: \
161 case TOKint16: case TOKuns16: \
162 case TOKint32: case TOKuns32: \
163 case TOKint64: case TOKuns64: \
164 case TOKfloat32: case TOKfloat64: case TOKfloat80: \
165 case TOKimaginary32: case TOKimaginary64: case TOKimaginary80: \
166 case TOKcomplex32: case TOKcomplex64: case TOKcomplex80: \
167 case TOKvoid
168
169 #define CASE_BASIC_TYPES_X(t) \
170 case TOKvoid: t = Type::tvoid; goto LabelX; \
171 case TOKint8: t = Type::tint8; goto LabelX; \
172 case TOKuns8: t = Type::tuns8; goto LabelX; \
173 case TOKint16: t = Type::tint16; goto LabelX; \
174 case TOKuns16: t = Type::tuns16; goto LabelX; \
175 case TOKint32: t = Type::tint32; goto LabelX; \
176 case TOKuns32: t = Type::tuns32; goto LabelX; \
177 case TOKint64: t = Type::tint64; goto LabelX; \
178 case TOKuns64: t = Type::tuns64; goto LabelX; \
179 case TOKfloat32: t = Type::tfloat32; goto LabelX; \
180 case TOKfloat64: t = Type::tfloat64; goto LabelX; \
181 case TOKfloat80: t = Type::tfloat80; goto LabelX; \
182 case TOKimaginary32: t = Type::timaginary32; goto LabelX; \
183 case TOKimaginary64: t = Type::timaginary64; goto LabelX; \
184 case TOKimaginary80: t = Type::timaginary80; goto LabelX; \
185 case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \
186 case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \
187 case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \
188 case TOKbit: t = Type::tbit; goto LabelX; \
189 case TOKbool: t = Type::tbool; goto LabelX; \
190 case TOKchar: t = Type::tchar; goto LabelX; \
191 case TOKwchar: t = Type::twchar; goto LabelX; \
192 case TOKdchar: t = Type::tdchar; goto LabelX; \
193 LabelX
194
195 struct Token
196 {
197 Token *next;
198 unsigned char *ptr; // pointer to first character of this token within buffer
199 enum TOK value;
200 unsigned char *blockComment; // doc comment string prior to this token
201 unsigned char *lineComment; // doc comment for previous token
202 union
203 {
204 // Integers
205 d_int32 int32value;
206 d_uns32 uns32value;
207 d_int64 int64value;
208 d_uns64 uns64value;
209
210 // Floats
211 #ifdef IN_GCC
212 // real_t float80value; // can't use this in a union!
213 #else
214 d_float80 float80value;
215 #endif
216
217 struct
218 { unsigned char *ustring; // UTF8 string
219 unsigned len;
220 unsigned char postfix; // 'c', 'w', 'd'
221 };
222
223 Identifier *ident;
224 };
225 #ifdef IN_GCC
226 real_t float80value; // can't use this in a union!
227 #endif
228
229 static char *tochars[TOKMAX];
230 static void *operator new(size_t sz);
231
232 int isKeyword();
233 void print();
234 char *toChars();
235 static char *toChars(enum TOK);
236 };
237
238 struct Lexer
239 {
240 static StringTable stringtable;
241 static OutBuffer stringbuffer;
242 static Token *freelist;
243
244 Loc loc; // for error messages
245
246 unsigned char *base; // pointer to start of buffer
247 unsigned char *end; // past end of buffer
248 unsigned char *p; // current character
249 Token token;
250 Module *mod;
251 int doDocComment; // collect doc comment information
252 int anyToken; // !=0 means seen at least one token
253 int commentToken; // !=0 means comments are TOKcomment's
254
255 Lexer(Module *mod,
256 unsigned char *base, unsigned begoffset, unsigned endoffset,
257 int doDocComment, int commentToken);
258
259 static void initKeywords();
260 static Identifier *idPool(const char *s);
261
262 TOK nextToken();
263 void scan(Token *t);
264 Token *peek(Token *t);
265 Token *peekPastParen(Token *t);
266 unsigned escapeSequence();
267 TOK wysiwygStringConstant(Token *t, int tc);
268 TOK hexStringConstant(Token *t);
269 TOK escapeStringConstant(Token *t, int wide);
270 TOK charConstant(Token *t, int wide);
271 void stringPostfix(Token *t);
272 unsigned wchar(unsigned u);
273 TOK number(Token *t);
274 TOK inreal(Token *t);
275 void error(const char *format, ...);
276 void error(Loc loc, const char *format, ...);
277 void pragma();
278 unsigned decodeUTF();
279 void getDocComment(Token *t, unsigned lineComment);
280
281 static int isValidIdentifier(char *p);
282 static unsigned char *combineComments(unsigned char *c1, unsigned char *c2);
283 };
284
285 #endif /* DMD_LEXER_H */