annotate trunk/src/dil/Token.d @ 390:4d36eea1bbc9

Refactored Lexer.scan(). Illegal characters are not ignored anymore. They are reported as errors. Added a new member 'ws' to Token. When a token is scanned the lexer sets ws to the leading whitespace or leaves it at null when no whitespace was found. Added Illegal to enum TOK and IllegalCharacter to enum MID. Added localized messages for MID.IllegalCharacter. Adapted code of cmd.Generate to make use of Token.ws.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Wed, 12 Sep 2007 21:03:41 +0200
parents 6a5fc22cae34
children 33b566df6af4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
1 /++
8ba2570de175 Initial import.
aziz
parents:
diff changeset
2 Author: Aziz Köksal
249
32d354584b28 - Upgraded license notices to GPL3.
aziz
parents: 239
diff changeset
3 License: GPL3
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
4 +/
326
4a7359b88c11 - Added package 'dil' to module declarations.
aziz
parents: 325
diff changeset
5 module dil.Token;
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
6 import std.c.stdlib : malloc, free;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
7 import std.outofmemory;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
8
8ba2570de175 Initial import.
aziz
parents:
diff changeset
9 struct Position
8ba2570de175 Initial import.
aziz
parents:
diff changeset
10 {
8ba2570de175 Initial import.
aziz
parents:
diff changeset
11 size_t loc;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
12 size_t col;
8ba2570de175 Initial import.
aziz
parents:
diff changeset
13 }
8ba2570de175 Initial import.
aziz
parents:
diff changeset
14
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
15 enum TOK : ushort
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
16 {
95
0eb4c8a5b32b - Added TOK.Invalid.
aziz
parents: 84
diff changeset
17 Invalid,
0eb4c8a5b32b - Added TOK.Invalid.
aziz
parents: 84
diff changeset
18
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
19 /// Flag for whitespace tokens that must be ignored in the parsing phase.
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
20 Whitespace = 0x8000,
390
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
21 Illegal = 1 | Whitespace,
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
22 Comment = 2 | Whitespace,
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
23 Shebang = 3 | Whitespace,
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
24 HashLine = 4 | Whitespace,
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
25 Filespec = 5 | Whitespace,
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
26
390
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
27 Identifier = 6,
9
5d6968cc751e - Parsing string and character literals now (rudimentary implementation.)
aziz
parents: 4
diff changeset
28 String,
82
fc645fb2fe72 - scanEscapeSequences() doesn't return 0xFFFF as an error value anymore, because it is a valid codepoint usable by the user.
aziz
parents: 71
diff changeset
29 CharLiteral, WCharLiteral, DCharLiteral,
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
30
343
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
31 // Special tokens
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
32 FILE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
33 LINE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
34 DATE,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
35 TIME,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
36 TIMESTAMP,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
37 VENDOR,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
38 VERSION,
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
39
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
40 // Number literals
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
41 Int32, Int64, Uint32, Uint64,
97
1a83e5070a84 - Added code for parsing IntNumber- and RealNumberExpressions.
aziz
parents: 95
diff changeset
42 // Floating point number scanner relies on this order. (FloatXY + 3 == ImaginaryXY)
62
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
43 Float32, Float64, Float80,
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
44 Imaginary32, Imaginary64, Imaginary80,
58
50bb7fc9db44 - The types of integers are recognized now.
aziz
parents: 56
diff changeset
45
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
46
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
47 // Brackets
20
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
48 LParen,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
49 RParen,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
50 LBracket,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
51 RBracket,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
52 LBrace,
d6adfbd7c513 - Added code for parsing braces.
aziz
parents: 18
diff changeset
53 RBrace,
21
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
54
22
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
55 Dot, Slice, Ellipses,
b05fff8e2ce4 - Added code for parsing Dot, Slice and Ellipses tokens.
aziz
parents: 21
diff changeset
56
131
ce636f3981cc - Removed TOK.Number.
aziz
parents: 107
diff changeset
57 // Floating point number operators
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
58 Unordered,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
59 UorE,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
60 UorG,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
61 UorGorE,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
62 UorL,
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
63 UorLorE,
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
64 LorEorG,
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
65 LorG,
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
66
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
67 // Normal operators
35
c470b9356e35 - Added code for parsing Unordered, UorE, UorG, UorGorE, UorL, UorLorE, NotEqual and Not tokens.
aziz
parents: 32
diff changeset
68 Assign, Equal, NotEqual, Not,
38
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
69 LessEqual, Less,
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
70 GreaterEqual, Greater,
36
3c7210a722f7 - Added code for parsing LorEorG, LorG, LessEqual, LessThan, LShiftAssign and LShift tokens.
aziz
parents: 35
diff changeset
71 LShiftAssign, LShift,
38
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
72 RShiftAssign,RShift,
640c45aaaaee - Added code for parsing GreaterEqual, Greater, RShiftAssign, RShift, URShiftAssign and URShift tokens.
aziz
parents: 36
diff changeset
73 URShiftAssign, URShift,
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
74 OrAssign, OrLogical, OrBinary,
24
903f91163f23 - Added code for parsing AndAssign, AndLogical and AndBinary tokens.
aziz
parents: 23
diff changeset
75 AndAssign, AndLogical, AndBinary,
25
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
76 PlusAssign, PlusPlus, Plus,
9c866208b3f6 - Added code for parsing PlusAssign, PlusPlus, Plus, MinusAssign, MinusMinus, Minus tokens.
aziz
parents: 24
diff changeset
77 MinusAssign, MinusMinus, Minus,
32
d7011daa4740 - Added missing commas after the items in the messages table.
aziz
parents: 31
diff changeset
78 DivAssign, Div,
29
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
79 MulAssign, Mul,
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
80 ModAssign, Mod,
ef83eea26bbd - Added code for parsing MulAssign, Mul, ModAssign, Mod, XorAssign and Xor tokens.
aziz
parents: 28
diff changeset
81 XorAssign, Xor,
27
43b6bf56f0e9 - Added code for parsing CatAssign and Tilde tokens.
aziz
parents: 26
diff changeset
82 CatAssign, Catenate,
39
69b940398d7b - Added unittest to test correct parsing of operator tokens.
aziz
parents: 38
diff changeset
83 Tilde,
71
b3777cca323c - Added Identity and NotIdentity tokens.
aziz
parents: 65
diff changeset
84 Identity, NotIdentity,
23
1a7903701a3d - Added code for parsing OrAssign, OrLogical and OrBinary tokens.
aziz
parents: 22
diff changeset
85
21
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
86 Colon,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
87 Semicolon,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
88 Question,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
89 Comma,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
90 Dollar,
c785c122e4e6 - Added code for parsing Colon, Semicolon, Question, Comma and Dollar.
aziz
parents: 20
diff changeset
91
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
92 /* Keywords:
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
93 NB.: Token.isKeyword() depends on this list being contiguous.
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
94 */
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
95 Abstract,Alias,Align,Asm,Assert,Auto,Body,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
96 Bool,Break,Byte,Case,Cast,Catch,Cdouble,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
97 Cent,Cfloat,Char,Class,Const,Continue,Creal,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
98 Dchar,Debug,Default,Delegate,Delete,Deprecated,Do,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
99 Double,Else,Enum,Export,Extern,False,Final,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
100 Finally,Float,For,Foreach,Foreach_reverse,Function,Goto,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
101 Idouble,If,Ifloat,Import,In,Inout,Int,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
102 Interface,Invariant,Ireal,Is,Lazy,Long,Macro/+D2.0+/,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
103 Mixin,Module,New,Null,Out,Override,Package,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
104 Pragma,Private,Protected,Public,Real,Ref/+D2.0+/,Return,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
105 Scope,Short,Static,Struct,Super,Switch,Synchronized,
269
a416e09c08ea - Implemented D 2.0 additions.
aziz
parents: 249
diff changeset
106 Template,This,Throw,Traits/+D2.0+/,True,Try,Typedef,Typeid,
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
107 Typeof,Ubyte,Ucent,Uint,Ulong,Union,Unittest,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
108 Ushort,Version,Void,Volatile,Wchar,While,With,
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
109
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
110 HEAD, // start of linked list
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
111 EOF,
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
112 MAX
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
113 }
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
114
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
115 alias TOK.Abstract KeywordsBegin;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
116 alias TOK.With KeywordsEnd;
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
117
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
118 struct Token
8ba2570de175 Initial import.
aziz
parents:
diff changeset
119 {
3
4bbce78bfb1e - Added TOK enum.
aziz
parents: 0
diff changeset
120 TOK type;
84
ac8d961d10d1 - Added code for parsing This-,Super-,Null-,Bool-,Dollar-,CharLiteral- and StringLiteralExpression.
aziz
parents: 82
diff changeset
121 // Position pos;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
122
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
123 Token* next, prev;
131
ce636f3981cc - Removed TOK.Number.
aziz
parents: 107
diff changeset
124
390
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
125 char* ws; /// Start of whitespace characters before token. Null if no WS.
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
126 char* start; /// Start of token in source text.
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
127 char* end; /// Points one past the end of token in source text.
4
92df59b1ec4a - Started implementation of scan().
aziz
parents: 3
diff changeset
128
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
129 union
8ba2570de175 Initial import.
aziz
parents:
diff changeset
130 {
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
131 struct
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
132 {
323
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
133 Token* line_num; // #line number
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
134 Token* line_filespec; // #line number filespec
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
135 }
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
136 struct
6259fb93e3dd - Rewrote scanSpecialToken().
aziz
parents: 314
diff changeset
137 {
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
138 string str;
383
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
139 char pf; /// Postfix 'c', 'w' or 'd'
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
140 version(D2)
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
141 Token* tok_str; /// Points to the contents of a token string stored as a
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
142 /// doubly linked list. The last token is always '}' or
6a5fc22cae34 Implemented scanner for new string literals and applied some fixes.
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 372
diff changeset
143 /// EOF in case end of source text is "q{" EOF.
31
94f09f4e988e - Added struct for strings to Token with 'pf' = postfix.
aziz
parents: 30
diff changeset
144 }
62
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
145 dchar dchar_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
146 long long_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
147 ulong ulong_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
148 int int_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
149 uint uint_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
150 float float_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
151 double double_;
96af5653acef - Fixed loop of hex number scanner. Moved checks under the switch block.
aziz
parents: 58
diff changeset
152 real real_;
0
8ba2570de175 Initial import.
aziz
parents:
diff changeset
153 }
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
154
107
722c05bbd5eb - Implemented parseEnumDeclaration() and added class EnumDeclaration.
aziz
parents: 97
diff changeset
155 alias srcText identifier;
722c05bbd5eb - Implemented parseEnumDeclaration() and added class EnumDeclaration.
aziz
parents: 97
diff changeset
156
65
6c21ae79fbb3 - Renamed function Token.span to Token.srcText.
aziz
parents: 62
diff changeset
157 string srcText()
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
158 {
30
426767b94635 - Added code for parsing the '#line' special token.
aziz
parents: 29
diff changeset
159 assert(start && end);
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
160 return start[0 .. end - start];
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
161 }
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
162
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
163 /// Find next non-whitespace token. Returns 'this' token if the next token is TOK.EOF or null.
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
164 Token* nextNWS()
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
165 out(token)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
166 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
167 assert(token !is null);
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
168 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
169 body
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
170 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
171 auto token = next;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
172 while (token !is null && token.isWhitespace)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
173 token = token.next;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
174 if (token is null || token.type == TOK.EOF)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
175 return this;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
176 return token;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
177 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
178
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
179 /// Find previous non-whitespace token. Returns 'this' token if the previous token is TOK.HEAD or null.
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
180 Token* prevNWS()
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
181 out(token)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
182 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
183 assert(token !is null);
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
184 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
185 body
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
186 {
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
187 auto token = prev;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
188 while (token !is null && token.isWhitespace)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
189 token = token.prev;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
190 if (token is null || token.type == TOK.HEAD)
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
191 return this;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
192 return token;
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
193 }
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
194
210
b7bde6583d3e - Made toString() static.
aziz
parents: 208
diff changeset
195 static string toString(TOK tok)
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
196 {
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
197 return tokToString[tok];
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
198 }
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
199
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
200 bool isKeyword()
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
201 {
163
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
202 return KeywordsBegin <= type && type <= KeywordsEnd;
28
3a9daccf7d96 - Added table for identifiers to Lexer.
aziz
parents: 27
diff changeset
203 }
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
204
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
205 bool isWhitespace()
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
206 {
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
207 return !!(type & TOK.Whitespace);
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
208 }
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
209
343
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
210 bool isSpecialToken()
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
211 {
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
212 return *start == '_' && type != TOK.Identifier;
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
213 }
95f1b6e43214 - Removed TOK.Special and added an own entry for each special token.
aziz
parents: 326
diff changeset
214
163
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
215 int opEquals(TOK type2)
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
216 {
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
217 return type == type2;
f27a98bb17c7 - Fix: when parsing Declarator fails, type and ident is set to null.
aziz
parents: 131
diff changeset
218 }
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
219
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
220 new(size_t size)
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
221 {
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
222 void* p = malloc(size);
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
223 if (p is null)
372
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
224 version(Tango)
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
225 throw new OutOfMemoryException(__FILE__, __LINE__);
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
226 else
eb08126cca56 - Applied patch by Jari-Matti M?kel?. Should compile with tangobos now.
aziz
parents: 359
diff changeset
227 throw new OutOfMemoryException();
239
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
228 *cast(Token*)p = Token.init;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
229 return p;
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
230 }
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
231
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
232 delete(void* p)
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
233 {
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
234 free(p);
7911f6a92e6e - Added 'new' and 'delete' declarations to Token and uncommented next and prev members. Added HEAD to TOK.
aziz
parents: 210
diff changeset
235 }
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
236 }
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
237
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
238 const string[] tokToString = [
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
239 "Invalid",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
240
390
4d36eea1bbc9 Refactored Lexer.scan().
Aziz K?ksal <aziz.koeksal@gmail.com>
parents: 383
diff changeset
241 "Illegal",
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
242 "Comment",
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
243 "#! /shebang/",
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
244 "#line",
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
245 `"filespec"`,
314
ebd21bbf296e - Added Whitespace, Sheband and Hashline to enum TOK. TOK.Whitespace is a flag and tokens that are considered whitespace are flagged as such.
aziz
parents: 312
diff changeset
246
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
247 "Identifier",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
248 "String",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
249 "CharLiteral", "WCharLiteral", "DCharLiteral",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
250
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
251 "__FILE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
252 "__LINE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
253 "__DATE__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
254 "__TIME__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
255 "__TIMESTAMP__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
256 "__VENDOR__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
257 "__VERSION__",
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
258
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
259 "Int32", "Int64", "Uint32", "Uint64",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
260 "Float32", "Float64", "Float80",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
261 "Imaginary32", "Imaginary64", "Imaginary80",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
262
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
263 "(",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
264 ")",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
265 "[",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
266 "]",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
267 "{",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
268 "}",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
269
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
270 ".", "..", "...",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
271
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
272 "Unordered",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
273 "UorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
274 "UorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
275 "UorGorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
276 "UorL",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
277 "UorLorE",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
278 "LorEorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
279 "LorG",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
280
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
281 "=", "==", "!=", "!",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
282 "<=", "<",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
283 ">=", ">",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
284 "<<=", "<<",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
285 ">>=",">>",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
286 ">>>=", ">>>",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
287 "|=", "||", "|",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
288 "&=", "&&", "&",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
289 "+=", "++", "+",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
290 "-=", "--", "-",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
291 "/=", "/",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
292 "*=", "*",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
293 "%=", "%",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
294 "^=", "^",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
295 "~=", "~",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
296 "~",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
297 "is", "!is",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
298
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
299 ":",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
300 ";",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
301 "?",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
302 ",",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
303 "$",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
304
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
305 "abstract","alias","align","asm","assert","auto","body",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
306 "bool","break","byte","case","cast","catch","cdouble",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
307 "cent","cfloat","char","class","const","continue","creal",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
308 "dchar","debug","default","delegate","delete","deprecated","do",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
309 "double","else","enum","export","extern","false","final",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
310 "finally","float","for","foreach","foreach_reverse","function","goto",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
311 "idouble","if","ifloat","import","in","inout","int",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
312 "interface","invariant","ireal","is","lazy","long","macro",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
313 "mixin","module","new","null","out","override","package",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
314 "pragma","private","protected","public","real","ref","return",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
315 "scope","short","static","struct","super","switch","synchronized",
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
316 "template","this","throw","__traits","true","try","typedef","typeid",
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
317 "typeof","ubyte","ucent","uint","ulong","union","unittest",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
318 "ushort","version","void","volatile","wchar","while","with",
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
319
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
320 "HEAD",
208
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
321 "EOF"
0a9bccf74046 - Added string table and toString() method to Token.
aziz
parents: 163
diff changeset
322 ];
359
511c14950cac - Added messages MissingLinkageType and UnrecognizedLinkageType.
aziz
parents: 343
diff changeset
323 static assert(tokToString.length == TOK.MAX);