comparison src/dil/lexer/Funcs.d @ 806:bcb74c9b895c

Moved out files in the trunk folder to the root.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sun, 09 Mar 2008 00:12:19 +0100
parents trunk/src/dil/lexer/Funcs.d@3b34f6a95a27
children
comparison
equal deleted inserted replaced
805:a3fab8b74a7d 806:bcb74c9b895c
1 /++
2 Author: Aziz Köksal
3 License: GPL3
4 +/
5 module dil.lexer.Funcs;
6
7 const char[3] LS = \u2028; /// Unicode line separator.
8 const dchar LSd = 0x2028; /// ditto
9 const char[3] PS = \u2029; /// Unicode paragraph separator.
10 const dchar PSd = 0x2029; /// ditto
11 static assert(LS[0] == PS[0] && LS[1] == PS[1]);
12
13 const dchar _Z_ = 26; /// Control+Z.
14
15 /// Returns: true if d is a Unicode line or paragraph separator.
16 bool isUnicodeNewlineChar(dchar d)
17 {
18 return d == LSd || d == PSd;
19 }
20
21 /// Returns: true if p points to a line or paragraph separator.
22 bool isUnicodeNewline(char* p)
23 {
24 return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]);
25 }
26
27 /// Returns: true if p points to the start of a Newline.
28 /// Newline: \n | \r | \r\n | LS | PS
29 bool isNewline(char* p)
30 {
31 return *p == '\n' || *p == '\r' || isUnicodeNewline(p);
32 }
33
34 /// Returns: true if c is a Newline character.
35 bool isNewline(dchar c)
36 {
37 return c == '\n' || c == '\r' || isUnicodeNewlineChar(c);
38 }
39
40 /// Returns: true if p points to an EOF character.
41 /// EOF: 0 | _Z_
42 bool isEOF(dchar c)
43 {
44 return c == 0 || c == _Z_;
45 }
46
47 /// Returns: true if p points to the first character of an EndOfLine.
48 /// EndOfLine: Newline | EOF
49 bool isEndOfLine(char* p)
50 {
51 return isNewline(p) || isEOF(*p);
52 }
53
54 /// Scans a Newline and sets p one character past it.
55 /// Returns: '\n' if found or 0 otherwise.
56 dchar scanNewline(ref char* p)
57 {
58 switch (*p)
59 {
60 case '\r':
61 if (p[1] == '\n')
62 ++p;
63 case '\n':
64 ++p;
65 return '\n';
66 default:
67 if (isUnicodeNewline(p))
68 {
69 p += 3;
70 return '\n';
71 }
72 }
73 return 0;
74 }
75
76 /// ASCII character properties table.
77 static const int ptable[256] = [
78 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80 32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0,
81 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00,
82 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8,
83 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16,
84 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8,
85 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
94 ];
95
96 /// Enumeration of character property flags.
97 enum CProperty
98 {
99 Octal = 1, /// 0-7
100 Digit = 1<<1, /// 0-9
101 Hex = 1<<2, /// 0-9a-fA-F
102 Alpha = 1<<3, /// a-zA-Z
103 Underscore = 1<<4, /// _
104 Whitespace = 1<<5 /// ' ' \t \v \f
105 }
106
107 const uint EVMask = 0xFF00; // Bit mask for escape value.
108
109 private alias CProperty CP;
110 /// Returns: true if c is an octal digit.
111 int isoctal(char c) { return ptable[c] & CP.Octal; }
112 /// Returns: true if c is a decimal digit.
113 int isdigit(char c) { return ptable[c] & CP.Digit; }
114 /// Returns: true if c is a hexadecimal digit.
115 int ishexad(char c) { return ptable[c] & CP.Hex; }
116 /// Returns: true if c is a letter.
117 int isalpha(char c) { return ptable[c] & CP.Alpha; }
118 /// Returns: true if c is an alphanumeric.
119 int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); }
120 /// Returns: true if c is the beginning of a D identifier (only ASCII.)
121 int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); }
122 /// Returns: true if c is a D identifier character (only ASCII.)
123 int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); }
124 /// Returns: true if c is a whitespace character.
125 int isspace(char c) { return ptable[c] & CP.Whitespace; }
126 /// Returns: the escape value for c.
127 int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ }
128 /// Returns: true if c is an ASCII character.
129 int isascii(uint c) { return c < 128; }
130
131 version(gen_ptable)
132 static this()
133 {
134 alias ptable p;
135 assert(p.length == 256);
136 // Initialize character properties table.
137 for (int i; i < p.length; ++i)
138 {
139 p[i] = 0; // Reset
140 if ('0' <= i && i <= '7')
141 p[i] |= CP.Octal;
142 if ('0' <= i && i <= '9')
143 p[i] |= CP.Digit | CP.Hex;
144 if ('a' <= i && i <= 'f' || 'A' <= i && i <= 'F')
145 p[i] |= CP.Hex;
146 if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z')
147 p[i] |= CP.Alpha;
148 if (i == '_')
149 p[i] |= CP.Underscore;
150 if (i == ' ' || i == '\t' || i == '\v' || i == '\f')
151 p[i] |= CP.Whitespace;
152 }
153 // Store escape sequence values in second byte.
154 assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap.");
155 p['\''] |= 39 << 8;
156 p['"'] |= 34 << 8;
157 p['?'] |= 63 << 8;
158 p['\\'] |= 92 << 8;
159 p['a'] |= 7 << 8;
160 p['b'] |= 8 << 8;
161 p['f'] |= 12 << 8;
162 p['n'] |= 10 << 8;
163 p['r'] |= 13 << 8;
164 p['t'] |= 9 << 8;
165 p['v'] |= 11 << 8;
166 // Print a formatted array literal.
167 char[] array = "[\n";
168 foreach (i, c; ptable)
169 {
170 array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n");
171 }
172 array[$-2..$] = "\n]";
173 Stdout(array).newline;
174 }