annotate trunk/src/dil/Token.d @ 383:6a5fc22cae34

Implemented scanner for new string literals and applied some fixes. Added methods scanDelimitedStringLiteral() and scanTokenStringLiteral(). They are practically fully implemented. Error messages have to be added yet. Partially rewrote scanShebang() as it was buggy. Now it decodes Unicode characters and the code is simpler to read. Fixed scanRawStringLiteral(): Unicode characters weren't decoded. Added some safeguards to decodeUTF8().
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Tue, 11 Sep 2007 18:15:02 +0200
parents eb08126cca56
children 4d36eea1bbc9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
249
32d354584b28 - Upgraded license notices to GPL3.
aziz
parents: 239
diff changeset
3 License: GPL3
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
326
4a7359b88c11 - Added package 'dil' to module declarations.
aziz
parents: 325
diff changeset
5 module dil.Token;
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
6 import std.c.stdlib : malloc, free;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
7 import std.outofmemory;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
8
8ba2570de175 Initial import.
aziz
parents:
diff changeset
9 struct Position
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 size_t loc;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
12 size_t col;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
13 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
14
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
15 enum TOK : ushort
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
16 {
95
0eb4c8a5b32b - Added TOK.Invalid.
aziz
parents: 84
diff changeset
17 Invalid,
0eb4c8a5b32b - Added TOK.Invalid.
aziz
parents: 84
diff changeset
18
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
19 /// Flag for whitespace tokens that must be ignored in the parsing phase.
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
20 Whitespace = 0x8000,
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
21 Comment = 1 | Whitespace,
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
22 Shebang = 2 | Whitespace,
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
23 HashLine = 3 | Whitespace,
323
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
24 Filespec = 4 | Whitespace,
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
25
323
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
26 Identifier = 5,
9
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 4
diff changeset
27 String,
82
fc645fb2fe72 - scanEscapeSequences() doesn't return 0xFFFF as an error value anymore, because it is a valid codepoint usable by the user.
aziz
parents: 71
diff changeset
28 CharLiteral, WCharLiteral, DCharLiteral,
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
29
343
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
30 // Special tokens
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
31 FILE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
32 LINE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
33 DATE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
34 TIME,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
35 TIMESTAMP,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
36 VENDOR,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
37 VERSION,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
38
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
39 // Number literals
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
40 Int32, Int64, Uint32, Uint64,
97
1a83e5070a84 - Added code for parsing IntNumber- and RealNumberExpressions.
aziz
parents: 95
diff changeset
41 // Floating point number scanner relies on this order. (FloatXY + 3 == ImaginaryXY)
62
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
42 Float32, Float64, Float80,
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
43 Imaginary32, Imaginary64, Imaginary80,
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
44
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
45
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
46 // Brackets
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
47 LParen,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
48 RParen,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
49 LBracket,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
50 RBracket,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
51 LBrace,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
52 RBrace,
21
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
53
22
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
54 Dot, Slice, Ellipses,
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
55
131
ce636f3981cc - Removed TOK.Number.
aziz
parents: 107
diff changeset
56 // Floating point number operators
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
57 Unordered,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
58 UorE,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
59 UorG,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
60 UorGorE,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
61 UorL,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
62 UorLorE,
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
63 LorEorG,
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
64 LorG,
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
65
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
66 // Normal operators
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
67 Assign, Equal, NotEqual, Not,
38
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
68 LessEqual, Less,
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
69 GreaterEqual, Greater,
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
70 LShiftAssign, LShift,
38
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
71 RShiftAssign,RShift,
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
72 URShiftAssign, URShift,
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
73 OrAssign, OrLogical, OrBinary,
24
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
74 AndAssign, AndLogical, AndBinary,
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
75 PlusAssign, PlusPlus, Plus,
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
76 MinusAssign, MinusMinus, Minus,
32
d7011daa4740 - Added missing commas after the items in the messages table.
aziz
parents: 31
diff changeset
77 DivAssign, Div,
29
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
78 MulAssign, Mul,
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
79 ModAssign, Mod,
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
80 XorAssign, Xor,
27
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
81 CatAssign, Catenate,
39
69b940398d7b - Added unittest to test correct parsing of operator tokens.
aziz
parents: 38
diff changeset
82 Tilde,
71
b3777cca323c - Added Identity and NotIdentity tokens.
aziz
parents: 65
diff changeset
83 Identity, NotIdentity,
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
84
21
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
85 Colon,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
86 Semicolon,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
87 Question,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
88 Comma,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
89 Dollar,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
90
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
91 /* Keywords:
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
92 NB.: Token.isKeyword() depends on this list being contiguous.
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
93 */
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
94 Abstract,Alias,Align,Asm,Assert,Auto,Body,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
95 Bool,Break,Byte,Case,Cast,Catch,Cdouble,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
96 Cent,Cfloat,Char,Class,Const,Continue,Creal,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
97 Dchar,Debug,Default,Delegate,Delete,Deprecated,Do,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
98 Double,Else,Enum,Export,Extern,False,Final,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
99 Finally,Float,For,Foreach,Foreach_reverse,Function,Goto,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
100 Idouble,If,Ifloat,Import,In,Inout,Int,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
101 Interface,Invariant,Ireal,Is,Lazy,Long,Macro/+D2.0+/,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
102 Mixin,Module,New,Null,Out,Override,Package,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
103 Pragma,Private,Protected,Public,Real,Ref/+D2.0+/,Return,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
104 Scope,Short,Static,Struct,Super,Switch,Synchronized,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
105 Template,This,Throw,Traits/+D2.0+/,True,Try,Typedef,Typeid,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
106 Typeof,Ubyte,Ucent,Uint,Ulong,Union,Unittest,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
107 Ushort,Version,Void,Volatile,Wchar,While,With,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
108
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
109 HEAD, // start of linked list
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
110 EOF,
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
111 MAX
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
112 }
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
113
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
114 alias TOK.Abstract KeywordsBegin;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
115 alias TOK.With KeywordsEnd;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
116
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
117 struct Token
8ba2570de175 Initial import.
aziz
parents:
diff changeset
118 {
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
119 TOK type;
84
ac8d961d10d1 - Added code for parsing This-,Super-,Null-,Bool-,Dollar-,CharLiteral- and StringLiteralExpression.
aziz
parents: 82
diff changeset
120 // Position pos;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
121
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
122 Token* next, prev;
131
ce636f3981cc - Removed TOK.Number.
aziz
parents: 107
diff changeset
123
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
124 char* start;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
125 char* end;
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
126
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
127 union
8ba2570de175 Initial import.
aziz
parents:
diff changeset
128 {
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
129 struct
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
130 {
323
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
131 Token* line_num; // #line number
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
132 Token* line_filespec; // #line number filespec
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
133 }
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
134 struct
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
135 {
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
136 string str;
383
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
137 char pf; /// Postfix 'c', 'w' or 'd'
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
138 version(D2)
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
139 Token* tok_str; /// Points to the contents of a token string stored as a
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
140 /// doubly linked list. The last token is always '}' or
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
141 /// EOF in case end of source text is "q{" EOF.
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
142 }
62
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
143 dchar dchar_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
144 long long_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
145 ulong ulong_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
146 int int_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
147 uint uint_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
148 float float_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
149 double double_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
150 real real_;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
151 }
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
152
107
722c05bbd5eb - Implemented parseEnumDeclaration() and added class EnumDeclaration.
aziz
parents: 97
diff changeset
153 alias srcText identifier;
722c05bbd5eb - Implemented parseEnumDeclaration() and added class EnumDeclaration.
aziz
parents: 97
diff changeset
154
65
6c21ae79fbb3 - Renamed function Token.span to Token.srcText.
aziz
parents: 62
diff changeset
155 string srcText()
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
156 {
30
426767b94635 - Added code for parsing the '#line' special token.
aziz
parents: 29
diff changeset
157 assert(start && end);
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
158 return start[0 .. end - start];
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
159 }
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
160
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
161 /// Find next non-whitespace token. Returns 'this' token if the next token is TOK.EOF or null.
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
162 Token* nextNWS()
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
163 out(token)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
164 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
165 assert(token !is null);
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
166 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
167 body
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
168 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
169 auto token = next;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
170 while (token !is null && token.isWhitespace)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
171 token = token.next;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
172 if (token is null || token.type == TOK.EOF)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
173 return this;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
174 return token;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
175 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
176
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
177 /// Find previous non-whitespace token. Returns 'this' token if the previous token is TOK.HEAD or null.
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
178 Token* prevNWS()
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
179 out(token)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
180 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
181 assert(token !is null);
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
182 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
183 body
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
184 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
185 auto token = prev;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
186 while (token !is null && token.isWhitespace)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
187 token = token.prev;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
188 if (token is null || token.type == TOK.HEAD)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
189 return this;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
190 return token;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
191 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
192
210
b7bde6583d3e - Made toString() static.
aziz
parents: 208
diff changeset
193 static string toString(TOK tok)
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
194 {
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
195 return tokToString[tok];
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
196 }
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
197
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
198 bool isKeyword()
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
199 {
163
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
200 return KeywordsBegin <= type && type <= KeywordsEnd;
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
201 }
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
202
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
203 bool isWhitespace()
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
204 {
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
205 return !!(type & TOK.Whitespace);
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
206 }
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
207
343
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
208 bool isSpecialToken()
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
209 {
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
210 return *start == '_' && type != TOK.Identifier;
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
211 }
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
212
163
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
213 int opEquals(TOK type2)
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
214 {
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
215 return type == type2;
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
216 }
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
217
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
218 new(size_t size)
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
219 {
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
220 void* p = malloc(size);
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
221 if (p is null)
372
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
222 version(Tango)
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
223 throw new OutOfMemoryException(__FILE__, __LINE__);
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
224 else
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
225 throw new OutOfMemoryException();
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
226 *cast(Token*)p = Token.init;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
227 return p;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
228 }
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
229
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
230 delete(void* p)
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
231 {
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
232 free(p);
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
233 }
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
234 }
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
235
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
236 const string[] tokToString = [
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
237 "Invalid",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
238
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
239 "Comment",
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
240 "#! /shebang/",
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
241 "#line",
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
242 `"filespec"`,
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
243
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
244 "Identifier",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
245 "String",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
246 "CharLiteral", "WCharLiteral", "DCharLiteral",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
247
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
248 "__FILE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
249 "__LINE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
250 "__DATE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
251 "__TIME__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
252 "__TIMESTAMP__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
253 "__VENDOR__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
254 "__VERSION__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
255
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
256 "Int32", "Int64", "Uint32", "Uint64",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
257 "Float32", "Float64", "Float80",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
258 "Imaginary32", "Imaginary64", "Imaginary80",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
259
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
260 "(",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
261 ")",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
262 "[",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
263 "]",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
264 "{",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
265 "}",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
266
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
267 ".", "..", "...",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
268
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
269 "Unordered",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
270 "UorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
271 "UorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
272 "UorGorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
273 "UorL",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
274 "UorLorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
275 "LorEorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
276 "LorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
277
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
278 "=", "==", "!=", "!",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
279 "<=", "<",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
280 ">=", ">",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
281 "<<=", "<<",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
282 ">>=",">>",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
283 ">>>=", ">>>",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
284 "|=", "||", "|",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
285 "&=", "&&", "&",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
286 "+=", "++", "+",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
287 "-=", "--", "-",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
288 "/=", "/",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
289 "*=", "*",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
290 "%=", "%",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
291 "^=", "^",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
292 "~=", "~",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
293 "~",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
294 "is", "!is",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
295
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
296 ":",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
297 ";",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
298 "?",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
299 ",",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
300 "$",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
301
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
302 "abstract","alias","align","asm","assert","auto","body",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
303 "bool","break","byte","case","cast","catch","cdouble",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
304 "cent","cfloat","char","class","const","continue","creal",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
305 "dchar","debug","default","delegate","delete","deprecated","do",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
306 "double","else","enum","export","extern","false","final",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
307 "finally","float","for","foreach","foreach_reverse","function","goto",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
308 "idouble","if","ifloat","import","in","inout","int",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
309 "interface","invariant","ireal","is","lazy","long","macro",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
310 "mixin","module","new","null","out","override","package",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
311 "pragma","private","protected","public","real","ref","return",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
312 "scope","short","static","struct","super","switch","synchronized",
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
313 "template","this","throw","__traits","true","try","typedef","typeid",
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
314 "typeof","ubyte","ucent","uint","ulong","union","unittest",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
315 "ushort","version","void","volatile","wchar","while","with",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
316
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
317 "HEAD",
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
318 "EOF"
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
319 ];
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
320 static assert(tokToString.length == TOK.MAX);