Mercurial > projects > dil
view trunk/src/dil/lexer/Funcs.d @ 715:b6c6baa41267
Moved character properties table to dil.lexer.Funcs.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Thu, 31 Jan 2008 15:58:10 +0100 |
parents | 9e811db780a6 |
children | 5e3ef1b2011c |
line wrap: on
line source
/++ Author: Aziz Köksal License: GPL3 +/ module dil.lexer.Funcs; const char[3] LS = \u2028; /// Line separator. const char[3] PS = \u2029; /// Paragraph separator. const dchar LSd = 0x2028; const dchar PSd = 0x2029; static assert(LS[0] == PS[0] && LS[1] == PS[1]); const uint _Z_ = 26; /// Control+Z /// Returns true if d is a Unicode line or paragraph separator. bool isUnicodeNewlineChar(dchar d) { return d == LSd || d == PSd; } /// Returns true if p points to a line or paragraph separator. bool isUnicodeNewline(char* p) { return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]); } /++ Returns true if p points to the start of a Newline. Newline: \n | \r | \r\n | LS | PS +/ bool isNewline(char* p) { return *p == '\n' || *p == '\r' || isUnicodeNewline(p); } /// Returns if c is a Newline character. bool isNewline(dchar c) { return c == '\n' || c == '\r' || isUnicodeNewlineChar(c); } /++ Returns true if p points to an EOF character. EOF: 0 | _Z_ +/ bool isEOF(dchar c) { return c == 0 || c == _Z_; } /++ Returns true if p points to the first character of an EndOfLine. EndOfLine: Newline | EOF +/ bool isEndOfLine(char* p) { return isNewline(p) || isEOF(*p); } /++ Scans a Newline and sets p one character past it. Returns '\n' if scanned or 0 otherwise. +/ dchar scanNewline(ref char* p) { switch (*p) { case '\r': if (p[1] == '\n') ++p; case '\n': ++p; return '\n'; default: if (isUnicodeNewline(p)) { p += 3; return '\n'; } } return 0; } /// ASCII character properties table. static const int ptable[256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00, 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16, 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8, 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; enum CProperty { Octal = 1, Digit = 1<<1, Hex = 1<<2, Alpha = 1<<3, Underscore = 1<<4, Whitespace = 1<<5 } const uint EVMask = 0xFF00; // Bit mask for escape value private alias CProperty CP; int isoctal(char c) { return ptable[c] & CP.Octal; } int isdigit(char c) { return ptable[c] & CP.Digit; } int ishexad(char c) { return ptable[c] & CP.Hex; } int isalpha(char c) { return ptable[c] & CP.Alpha; } int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } int isspace(char c) { return ptable[c] & CP.Whitespace; } int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ } int isascii(uint c) { return c < 128; } version(gen_ptable) static this() { alias ptable p; assert(p.length == 256); // Initialize character properties table. for (int i; i < p.length; ++i) { p[i] = 0; // Reset if ('0' <= i && i <= '7') p[i] |= CP.Octal; if ('0' <= i && i <= '9') p[i] |= CP.Digit | CP.Hex; if ('a' <= i && i <= 'f' || 'A' <= i && i <= 'F') p[i] |= CP.Hex; if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') p[i] |= CP.Alpha; if (i == '_') p[i] |= CP.Underscore; if (i == ' ' || i == '\t' || i == '\v' || i == '\f') p[i] |= CP.Whitespace; } // Store escape sequence values in second byte. assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap."); p['\''] |= 39 << 8; p['"'] |= 34 << 8; p['?'] |= 63 << 8; p['\\'] |= 92 << 8; p['a'] |= 7 << 8; p['b'] |= 8 << 8; p['f'] |= 12 << 8; p['n'] |= 10 << 8; p['r'] |= 13 << 8; p['t'] |= 9 << 8; p['v'] |= 11 << 8; // Print a formatted array literal. char[] array = "[\n"; foreach (i, c; ptable) { array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n"); } array[$-2..$] = "\n]"; Stdout(array).newline; }