Mercurial > projects > dil
comparison src/dil/lexer/Funcs.d @ 806:bcb74c9b895c
Moved out files in the trunk folder to the root.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 09 Mar 2008 00:12:19 +0100 |
parents | trunk/src/dil/lexer/Funcs.d@3b34f6a95a27 |
children |
comparison
equal
deleted
inserted
replaced
805:a3fab8b74a7d | 806:bcb74c9b895c |
---|---|
1 /++ | |
2 Author: Aziz Köksal | |
3 License: GPL3 | |
4 +/ | |
5 module dil.lexer.Funcs; | |
6 | |
7 const char[3] LS = \u2028; /// Unicode line separator. | |
8 const dchar LSd = 0x2028; /// ditto | |
9 const char[3] PS = \u2029; /// Unicode paragraph separator. | |
10 const dchar PSd = 0x2029; /// ditto | |
11 static assert(LS[0] == PS[0] && LS[1] == PS[1]); | |
12 | |
13 const dchar _Z_ = 26; /// Control+Z. | |
14 | |
15 /// Returns: true if d is a Unicode line or paragraph separator. | |
16 bool isUnicodeNewlineChar(dchar d) | |
17 { | |
18 return d == LSd || d == PSd; | |
19 } | |
20 | |
21 /// Returns: true if p points to a line or paragraph separator. | |
22 bool isUnicodeNewline(char* p) | |
23 { | |
24 return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]); | |
25 } | |
26 | |
27 /// Returns: true if p points to the start of a Newline. | |
28 /// Newline: \n | \r | \r\n | LS | PS | |
29 bool isNewline(char* p) | |
30 { | |
31 return *p == '\n' || *p == '\r' || isUnicodeNewline(p); | |
32 } | |
33 | |
34 /// Returns: true if c is a Newline character. | |
35 bool isNewline(dchar c) | |
36 { | |
37 return c == '\n' || c == '\r' || isUnicodeNewlineChar(c); | |
38 } | |
39 | |
40 /// Returns: true if p points to an EOF character. | |
41 /// EOF: 0 | _Z_ | |
42 bool isEOF(dchar c) | |
43 { | |
44 return c == 0 || c == _Z_; | |
45 } | |
46 | |
47 /// Returns: true if p points to the first character of an EndOfLine. | |
48 /// EndOfLine: Newline | EOF | |
49 bool isEndOfLine(char* p) | |
50 { | |
51 return isNewline(p) || isEOF(*p); | |
52 } | |
53 | |
54 /// Scans a Newline and sets p one character past it. | |
55 /// Returns: '\n' if found or 0 otherwise. | |
56 dchar scanNewline(ref char* p) | |
57 { | |
58 switch (*p) | |
59 { | |
60 case '\r': | |
61 if (p[1] == '\n') | |
62 ++p; | |
63 case '\n': | |
64 ++p; | |
65 return '\n'; | |
66 default: | |
67 if (isUnicodeNewline(p)) | |
68 { | |
69 p += 3; | |
70 return '\n'; | |
71 } | |
72 } | |
73 return 0; | |
74 } | |
75 | |
76 /// ASCII character properties table. | |
77 static const int ptable[256] = [ | |
78 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, | |
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
80 32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0, | |
81 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00, | |
82 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
83 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16, | |
84 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8, | |
85 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0, | |
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
94 ]; | |
95 | |
96 /// Enumeration of character property flags. | |
97 enum CProperty | |
98 { | |
99 Octal = 1, /// 0-7 | |
100 Digit = 1<<1, /// 0-9 | |
101 Hex = 1<<2, /// 0-9a-fA-F | |
102 Alpha = 1<<3, /// a-zA-Z | |
103 Underscore = 1<<4, /// _ | |
104 Whitespace = 1<<5 /// ' ' \t \v \f | |
105 } | |
106 | |
107 const uint EVMask = 0xFF00; // Bit mask for escape value. | |
108 | |
109 private alias CProperty CP; | |
110 /// Returns: true if c is an octal digit. | |
111 int isoctal(char c) { return ptable[c] & CP.Octal; } | |
112 /// Returns: true if c is a decimal digit. | |
113 int isdigit(char c) { return ptable[c] & CP.Digit; } | |
114 /// Returns: true if c is a hexadecimal digit. | |
115 int ishexad(char c) { return ptable[c] & CP.Hex; } | |
116 /// Returns: true if c is a letter. | |
117 int isalpha(char c) { return ptable[c] & CP.Alpha; } | |
118 /// Returns: true if c is an alphanumeric. | |
119 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } | |
120 /// Returns: true if c is the beginning of a D identifier (only ASCII.) | |
121 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } | |
122 /// Returns: true if c is a D identifier character (only ASCII.) | |
123 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } | |
124 /// Returns: true if c is a whitespace character. | |
125 int isspace(char c) { return ptable[c] & CP.Whitespace; } | |
126 /// Returns: the escape value for c. | |
127 int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ } | |
128 /// Returns: true if c is an ASCII character. | |
129 int isascii(uint c) { return c < 128; } | |
130 | |
131 version(gen_ptable) | |
132 static this() | |
133 { | |
134 alias ptable p; | |
135 assert(p.length == 256); | |
136 // Initialize character properties table. | |
137 for (int i; i < p.length; ++i) | |
138 { | |
139 p[i] = 0; // Reset | |
140 if ('0' <= i && i <= '7') | |
141 p[i] |= CP.Octal; | |
142 if ('0' <= i && i <= '9') | |
143 p[i] |= CP.Digit | CP.Hex; | |
144 if ('a' <= i && i <= 'f' || 'A' <= i && i <= 'F') | |
145 p[i] |= CP.Hex; | |
146 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') | |
147 p[i] |= CP.Alpha; | |
148 if (i == '_') | |
149 p[i] |= CP.Underscore; | |
150 if (i == ' ' || i == '\t' || i == '\v' || i == '\f') | |
151 p[i] |= CP.Whitespace; | |
152 } | |
153 // Store escape sequence values in second byte. | |
154 assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap."); | |
155 p['\''] |= 39 << 8; | |
156 p['"'] |= 34 << 8; | |
157 p['?'] |= 63 << 8; | |
158 p['\\'] |= 92 << 8; | |
159 p['a'] |= 7 << 8; | |
160 p['b'] |= 8 << 8; | |
161 p['f'] |= 12 << 8; | |
162 p['n'] |= 10 << 8; | |
163 p['r'] |= 13 << 8; | |
164 p['t'] |= 9 << 8; | |
165 p['v'] |= 11 << 8; | |
166 // Print a formatted array literal. | |
167 char[] array = "[\n"; | |
168 foreach (i, c; ptable) | |
169 { | |
170 array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n"); | |
171 } | |
172 array[$-2..$] = "\n]"; | |
173 Stdout(array).newline; | |
174 } |