Mercurial > projects > dil
view src/dil/lexer/Funcs.d @ 806:bcb74c9b895c
Moved out files in the trunk folder to the root.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sun, 09 Mar 2008 00:12:19 +0100 |
parents | trunk/src/dil/lexer/Funcs.d@3b34f6a95a27 |
children |
line wrap: on
line source
/++ Author: Aziz Köksal License: GPL3 +/ module dil.lexer.Funcs; const char[3] LS = \u2028; /// Unicode line separator. const dchar LSd = 0x2028; /// ditto const char[3] PS = \u2029; /// Unicode paragraph separator. const dchar PSd = 0x2029; /// ditto static assert(LS[0] == PS[0] && LS[1] == PS[1]); const dchar _Z_ = 26; /// Control+Z. /// Returns: true if d is a Unicode line or paragraph separator. bool isUnicodeNewlineChar(dchar d) { return d == LSd || d == PSd; } /// Returns: true if p points to a line or paragraph separator. bool isUnicodeNewline(char* p) { return *p == LS[0] && p[1] == LS[1] && (p[2] == LS[2] || p[2] == PS[2]); } /// Returns: true if p points to the start of a Newline. /// Newline: \n | \r | \r\n | LS | PS bool isNewline(char* p) { return *p == '\n' || *p == '\r' || isUnicodeNewline(p); } /// Returns: true if c is a Newline character. bool isNewline(dchar c) { return c == '\n' || c == '\r' || isUnicodeNewlineChar(c); } /// Returns: true if p points to an EOF character. /// EOF: 0 | _Z_ bool isEOF(dchar c) { return c == 0 || c == _Z_; } /// Returns: true if p points to the first character of an EndOfLine. /// EndOfLine: Newline | EOF bool isEndOfLine(char* p) { return isNewline(p) || isEOF(*p); } /// Scans a Newline and sets p one character past it. /// Returns: '\n' if found or 0 otherwise. dchar scanNewline(ref char* p) { switch (*p) { case '\r': if (p[1] == '\n') ++p; case '\n': ++p; return '\n'; default: if (isUnicodeNewline(p)) { p += 3; return '\n'; } } return 0; } /// ASCII character properties table. static const int ptable[256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0,32,32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0x2200, 0, 0, 0, 0, 0x2700, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 0, 0, 0, 0, 0, 0x3f00, 0,12,12,12,12,12,12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0x5c00, 0, 0,16, 0, 0x70c, 0x80c,12,12,12, 0xc0c, 8, 8, 8, 8, 8, 8, 8, 0xa08, 8, 8, 8, 0xd08, 8, 0x908, 8, 0xb08, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; /// Enumeration of character property flags. enum CProperty { Octal = 1, /// 0-7 Digit = 1<<1, /// 0-9 Hex = 1<<2, /// 0-9a-fA-F Alpha = 1<<3, /// a-zA-Z Underscore = 1<<4, /// _ Whitespace = 1<<5 /// ' ' \t \v \f } const uint EVMask = 0xFF00; // Bit mask for escape value. private alias CProperty CP; /// Returns: true if c is an octal digit. int isoctal(char c) { return ptable[c] & CP.Octal; } /// Returns: true if c is a decimal digit. int isdigit(char c) { return ptable[c] & CP.Digit; } /// Returns: true if c is a hexadecimal digit. int ishexad(char c) { return ptable[c] & CP.Hex; } /// Returns: true if c is a letter. int isalpha(char c) { return ptable[c] & CP.Alpha; } /// Returns: true if c is an alphanumeric. int isalnum(char c) { return ptable[c] & (CP.Alpha | CP.Digit); } /// Returns: true if c is the beginning of a D identifier (only ASCII.) int isidbeg(char c) { return ptable[c] & (CP.Alpha | CP.Underscore); } /// Returns: true if c is a D identifier character (only ASCII.) int isident(char c) { return ptable[c] & (CP.Alpha | CP.Underscore | CP.Digit); } /// Returns: true if c is a whitespace character. int isspace(char c) { return ptable[c] & CP.Whitespace; } /// Returns: the escape value for c. int char2ev(char c) { return ptable[c] >> 8; /*(ptable[c] & EVMask) >> 8;*/ } /// Returns: true if c is an ASCII character. int isascii(uint c) { return c < 128; } version(gen_ptable) static this() { alias ptable p; assert(p.length == 256); // Initialize character properties table. for (int i; i < p.length; ++i) { p[i] = 0; // Reset if ('0' <= i && i <= '7') p[i] |= CP.Octal; if ('0' <= i && i <= '9') p[i] |= CP.Digit | CP.Hex; if ('a' <= i && i <= 'f' || 'A' <= i && i <= 'F') p[i] |= CP.Hex; if ('a' <= i && i <= 'z' || 'A' <= i && i <= 'Z') p[i] |= CP.Alpha; if (i == '_') p[i] |= CP.Underscore; if (i == ' ' || i == '\t' || i == '\v' || i == '\f') p[i] |= CP.Whitespace; } // Store escape sequence values in second byte. assert(CProperty.max <= ubyte.max, "character property flags and escape value byte overlap."); p['\''] |= 39 << 8; p['"'] |= 34 << 8; p['?'] |= 63 << 8; p['\\'] |= 92 << 8; p['a'] |= 7 << 8; p['b'] |= 8 << 8; p['f'] |= 12 << 8; p['n'] |= 10 << 8; p['r'] |= 13 << 8; p['t'] |= 9 << 8; p['v'] |= 11 << 8; // Print a formatted array literal. char[] array = "[\n"; foreach (i, c; ptable) { array ~= Format((c>255?" 0x{0:x},":"{0,2},"), c) ~ (((i+1) % 16) ? "":"\n"); } array[$-2..$] = "\n]"; Stdout(array).newline; }