# HG changeset patch # User Aziz K?ksal # Date 1201891440 -3600 # Node ID ceaac6a24258baa815e419d5d35eab20387b1706 # Parent 8955296dd8079359b78173bd10d8c53aa89cfe3c Added isUnicodeAlpha() for DDocParser and MacroParser. diff -r 8955296dd807 -r ceaac6a24258 trunk/src/dil/Unicode.d --- a/trunk/src/dil/Unicode.d Fri Feb 01 16:40:35 2008 +0100 +++ b/trunk/src/dil/Unicode.d Fri Feb 01 19:44:00 2008 +0100 @@ -42,25 +42,51 @@ return (b & 0xC0) == 0xC0; // 11xx_xxxx } +/// Advances ref_p only if this is a valid Unicode alpha character. +bool isUnicodeAlpha(ref char* ref_p, char* end) +in { assert(ref_p && ref_p < end); } +body +{ + if (*ref_p < 0x80) + return false; + auto p = ref_p; + auto c = decode(p, end); + if (!isUniAlpha(c)) + return false; + ref_p = p; + return true; +} + +/// index is set one past the last trail byte of the valid UTF-8 sequence. dchar decode(char[] str, ref size_t index) -in { assert(str.length); } +in { assert(str.length && index < str.length); } out(c) { assert(isValidChar(c)); } body { char* p = str.ptr + index; char* end = str.ptr + str.length; - dchar c = *p; + dchar c = decode(p, end); + if (c != ERROR_CHAR) + index = p - str.ptr + 1; + return c; +} - if (!(p < end)) - return ERROR_CHAR; +/// ref_p is set to the last trail byte of the valid UTF-8 sequence. +dchar decode(ref char* ref_p, char* end) +in { assert(ref_p && ref_p < end); } +out(c) { assert(isValidChar(c)); } +body +{ + char* p = ref_p; + dchar c = *p; if (c < 0x80) { - ++index; + ref_p++; return c; } - ++p; // Move to second byte. + p++; // Move to second byte. if (!(p < end)) return ERROR_CHAR; @@ -82,18 +108,16 @@ return ERROR_CHAR; } - const char[] checkNextByte = "if (++p < end && !isTrailByte(*p))" + const char[] checkNextByte = "if (!(++p < end && isTrailByte(*p)))" " return ERROR_CHAR;"; const char[] appendSixBits = "c = (c << 6) | *p & 0b0011_1111;"; - auto next_index = index; // Decode if ((c & 0b1110_0000) == 0b1100_0000) { // 110xxxxx 10xxxxxx c &= 0b0001_1111; mixin(appendSixBits); - next_index += 2; } else if ((c & 0b1111_0000) == 0b1110_0000) { @@ -101,7 +125,6 @@ c &= 0b0000_1111; mixin(appendSixBits ~ checkNextByte ~ appendSixBits); - next_index += 3; } else if ((c & 0b1111_1000) == 0b1111_0000) { @@ -110,7 +133,6 @@ mixin(appendSixBits ~ checkNextByte ~ appendSixBits ~ checkNextByte ~ appendSixBits); - next_index += 4; } else // 5 and 6 byte UTF-8 sequences are not allowed yet. @@ -122,7 +144,7 @@ if (!isValidChar(c)) return ERROR_CHAR; - index = next_index; + ref_p = p; return c; } diff -r 8955296dd807 -r ceaac6a24258 trunk/src/dil/doc/Doc.d --- a/trunk/src/dil/doc/Doc.d Fri Feb 01 16:40:35 2008 +0100 +++ b/trunk/src/dil/doc/Doc.d Fri Feb 01 19:44:00 2008 +0100 @@ -11,22 +11,36 @@ class DDocComment { - string text; Section[] sections; Section summary; /// Optional summary section. Section description; /// Optional description section. - this(string text) + this(Section[] sections, Section summary, Section description) { - assert(text.length && text[$-1] == '\0'); - this.text = text; + this.sections = sections; + this.summary = summary; + this.description = description; } +} + +struct DDocParser +{ + char* p; + char* textEnd; + Section[] sections; + Section summary; /// Optional summary section. + Section description; /// Optional description section. /// Parses the DDoc text into sections. - void parseSections() + Section[] parse(string text) { - char* p = text.ptr; - char* textEnd = p + text.length; + if (!text.length) + return null; + if (text[$-1] != '\0') + text ~= '\0'; + p = text.ptr; + textEnd = p + text.length; + char* summaryBegin; char* idBegin, idEnd; char* nextIdBegin, nextIdEnd; @@ -34,16 +48,19 @@ skipWhitespace(p); summaryBegin = p; - if (findNextIdColon(p, idBegin, idEnd)) + if (findNextIdColon(idBegin, idEnd)) { // Check that this is not an explicit section. if (summaryBegin != idBegin) scanSummaryAndDescription(summaryBegin, idBegin); } else // There are no explicit sections. - return scanSummaryAndDescription(summaryBegin, textEnd); + { + scanSummaryAndDescription(summaryBegin, textEnd); + return null; + } assert(idBegin && idEnd); - while (findNextIdColon(p, nextIdBegin, nextIdEnd)) + while (findNextIdColon(nextIdBegin, nextIdEnd)) { sections ~= new Section(makeString(idBegin, idEnd), makeString(idEnd+1, nextIdBegin)); idBegin = nextIdBegin; @@ -51,14 +68,15 @@ } // Add last section. sections ~= new Section(makeString(idBegin, idEnd), makeString(idEnd+1, textEnd)); + return sections; } void scanSummaryAndDescription(char* p, char* end) { - assert(p != end && p < end); + assert(p < end); char* sectionBegin = p; // Search for the end of the first paragraph. - while (p != end && !(*p == '\n' && p[1] == '\n')) + while (p < end && !(*p == '\n' && p[1] == '\n')) p++; // The first paragraph is the summary. summary = new Section("", makeString(sectionBegin, p)); @@ -88,23 +106,23 @@ /// idBegin = set to the first character of the Identifier /// idEnd = set to the colon following the Identifier /// Returns: true if found - bool findNextIdColon(ref char* ref_p, ref char* ref_idBegin, ref char* ref_idEnd) + bool findNextIdColon(ref char* ref_idBegin, ref char* ref_idEnd) { - auto p = ref_p; + auto p = this.p; while (*p != '\0') { auto idBegin = p; assert(isascii(*p) || isLeadByte(*p)); - if (isidbeg(*p) || isUnicodeAlpha(p)) // IdStart + if (isidbeg(*p) || isUnicodeAlpha(p, textEnd)) // IdStart { do // IdChar* p++; - while (isident(*p) || isUnicodeAlpha(p)) + while (isident(*p) || isUnicodeAlpha(p, textEnd)) if (*p == ':') // : { ref_idBegin = idBegin; ref_idEnd = p; - ref_p = p; + this.p = p; return true; } } @@ -118,65 +136,6 @@ } return false; } - - /// This function assumes that there are no invalid - /// UTF-8 sequences in the string. - bool isUnicodeAlpha(ref char* ref_p) - { - char* p = ref_p; // Copy. - if (isascii(*p)) - return false; - - dchar d = *p; - p++; // Move to second byte. - // Error if second byte is not a trail byte. - assert(isTrailByte(*p)); - // Check for overlong sequences. - assert(delegate () { - switch (d) - { - case 0xE0, 0xF0, 0xF8, 0xFC: - if ((*p & d) == 0x80) - return false; - default: - if ((d & 0xFE) == 0xC0) // 1100000x - return false; - return true; - } - }() == true - ); - const char[] checkNextByte = "p++;" - "assert(isTrailByte(*p));"; - const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;"; - // Decode - if ((d & 0b1110_0000) == 0b1100_0000) - { - d &= 0b0001_1111; - mixin(appendSixBits); - } - else if ((d & 0b1111_0000) == 0b1110_0000) - { - d &= 0b0000_1111; - mixin(appendSixBits ~ - checkNextByte ~ appendSixBits); - } - else if ((d & 0b1111_1000) == 0b1111_0000) - { - d &= 0b0000_0111; - mixin(appendSixBits ~ - checkNextByte ~ appendSixBits ~ - checkNextByte ~ appendSixBits); - } - else - return false; - - assert(isTrailByte(*p) && isValidChar(d)); - if (!isUniAlpha(d)) - return false; - // Only advance pointer if this is a Unicode alpha character. - ref_p = p; - return true; - } } class Section diff -r 8955296dd807 -r ceaac6a24258 trunk/src/dil/doc/Macro.d --- a/trunk/src/dil/doc/Macro.d Fri Feb 01 16:40:35 2008 +0100 +++ b/trunk/src/dil/doc/Macro.d Fri Feb 01 19:44:00 2008 +0100 @@ -94,11 +94,11 @@ { skipWhitespace(p); auto idBegin = p; - if (isidbeg(*p) || isUnicodeAlpha(p)) // IdStart + if (isidbeg(*p) || isUnicodeAlpha(p, textEnd)) // IdStart { do // IdChar* p++; - while (isident(*p) || isUnicodeAlpha(p)) + while (isident(*p) || isUnicodeAlpha(p, textEnd)) auto idEnd = p; skipWhitespace(p); @@ -121,63 +121,22 @@ p++; p++; } - - bool isUnicodeAlpha(ref char* ref_p) - { - char* p = ref_p; // Copy. - if (isascii(*p)) - return false; - - dchar d = *p; - p++; // Move to second byte. - // Error if second byte is not a trail byte. - if (!isTrailByte(*p)) - return false; - // Check for overlong sequences. - switch (d) - { - case 0xE0, 0xF0, 0xF8, 0xFC: - if ((*p & d) == 0x80) - return false; - default: - if ((d & 0xFE) == 0xC0) // 1100000x - return false; - } - const char[] checkNextByte = "if (!isTrailByte(*++p))" - " return false;"; - const char[] appendSixBits = "d = (d << 6) | *p & 0b0011_1111;"; - // Decode - if ((d & 0b1110_0000) == 0b1100_0000) - { - d &= 0b0001_1111; - mixin(appendSixBits); - } - else if ((d & 0b1111_0000) == 0b1110_0000) - { - d &= 0b0000_1111; - mixin(appendSixBits ~ - checkNextByte ~ appendSixBits); - } - else if ((d & 0b1111_1000) == 0b1111_0000) - { - d &= 0b0000_0111; - mixin(appendSixBits ~ - checkNextByte ~ appendSixBits ~ - checkNextByte ~ appendSixBits); - } - else - return false; - - assert(isTrailByte(*p)); - if (!isValidChar(d) || !isUniAlpha(d)) - return false; - // Only advance pointer if this is a Unicode alpha character. - ref_p = p; - return true; - } } char[] makeString(char* begin, char* end) { return begin[0 .. end - begin]; } + +char[] expandMacros(MacroTable table, char[] text) +{ + char[] result; + char* p = text.ptr; + char* textEnd = p + text.length; +// while (p < text.length) +// { + +// } + return result; +} +