Mercurial > projects > dil
changeset 739:49fe21aa387c
Added sanitizeText() to dil.Converter.
Cleaned predefined.ddoc up a bit.
Removed makeString() from dil.doc.Macro.
Added REPLACEMENT_STR to dil.Unicode.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sat, 09 Feb 2008 14:24:35 +0100 |
parents | 2afcc305831a |
children | f3dead0310ce |
files | trunk/src/cmd/DDoc.d trunk/src/dil/Converter.d trunk/src/dil/Unicode.d trunk/src/dil/doc/Doc.d trunk/src/dil/doc/Macro.d trunk/src/predefined.ddoc |
diffstat | 6 files changed, 82 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/src/cmd/DDoc.d Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/cmd/DDoc.d Sat Feb 09 14:24:35 2008 +0100 @@ -21,6 +21,7 @@ import dil.semantic.Symbols; import dil.Information; import dil.File; +import dil.Converter; import common; import tango.stdc.time : time_t, time, ctime; @@ -37,7 +38,7 @@ MacroParser mparser; foreach (macroPath; macroPaths) { - auto macros = mparser.parse(loadFile(macroPath)); + auto macros = mparser.parse(loadMacroFile(macroPath)); mtable = new MacroTable(mtable); mtable.insert(macros); } @@ -90,6 +91,11 @@ file.write(fileText); } +string loadMacroFile(string filePath) +{ + return sanitizeText(loadFile(filePath)); +} + /// Traverses the syntax tree and writes DDoc macros to a string buffer. class DDocEmitter : DefaultVisitor {
--- a/trunk/src/dil/Converter.d Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/dil/Converter.d Sat Feb 09 14:24:35 2008 +0100 @@ -237,3 +237,61 @@ return text; } } + +/// Replaces invalid UTF-8 sequences with U+FFFD (if there's enough space,) +/// and Newlines with '\n'. +string sanitizeText(string text) +{ + if (!text.length) + return null; + + char* p = text.ptr; + char* end = p + text.length; + char* q = p; + + for (; p < end; p++, q++) + { + assert(q <= p); + switch (*p) + { + case '\r': + if (p+1 < end && p[1] == '\n') + p++; + case '\n': + *q = '\n'; + continue; + default: + if (isascii(*p)) + break; + if (p+2 < end && isUnicodeNewline(p)) + { + p += 2; + goto case '\n'; + } + auto p2 = p; // Beginning of the UTF-8 sequence. + dchar c = decode(p, end); + if (c == ERROR_CHAR) + { // Skip to next ASCII character or valid UTF-8 sequence. + while (++p < end && isTrailByte(*p)) + {} + alias REPLACEMENT_STR R; + if (q+2 < p) // Copy replacement char if there is enough space. + (*q = R[0]), (*++q = R[1]), (*++q = R[2]); + p--; + } + else + { // Copy the valid UTF-8 sequence. + while (p2 <= p) // p points to the last trail byte. + *q++ = *p2++; // Copy code units. + q--; + } + continue; + } + assert(isascii(*p)); + *q = *p; + } + assert(p == end); + text.length = text.length - (p - q); + //text = text.ptr[0 .. q - text.ptr]; // Another way. + return text; +}
--- a/trunk/src/dil/Unicode.d Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/dil/Unicode.d Sat Feb 09 14:24:35 2008 +0100 @@ -7,6 +7,7 @@ /// U+FFFD = �. Used to replace invalid Unicode characters. const dchar REPLACEMENT_CHAR = '\uFFFD'; +const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto /// Invalid character, returned on errors. const dchar ERROR_CHAR = 0xD800;
--- a/trunk/src/dil/doc/Doc.d Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/dil/doc/Doc.d Sat Feb 09 14:24:35 2008 +0100 @@ -382,7 +382,7 @@ default: if (!isascii(result[i]) && i+2 < len && isUnicodeNewline(result.ptr + i)) { - i++; i++; + i += 2; goto case '\n'; } }
--- a/trunk/src/dil/doc/Macro.d Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/dil/doc/Macro.d Sat Feb 09 14:24:35 2008 +0100 @@ -81,11 +81,7 @@ } } -char[] makeString(char* begin, char* end) -{ - return begin[0 .. end - begin]; -} - +/// Expands the macros from the table in text. char[] expandMacros(MacroTable table, char[] text, char[][] args = null) { char[] result;
--- a/trunk/src/predefined.ddoc Sat Feb 09 02:06:32 2008 +0100 +++ b/trunk/src/predefined.ddoc Sat Feb 09 14:24:35 2008 +0100 @@ -6,7 +6,8 @@ <body> <h1>$(TITLE)</h1> $(BODY) -<p style="font-size:0.95em;">$(COPYRIGHT) Generated by $(LINK2 http://code.google.com/p/dil, dil) at $(DATETIME).</p> +<hr> +<p class="footer">$(COPYRIGHT) Generated by $(LINK2 http://code.google.com/p/dil, dil) at $(DATETIME).</p> </body> </html> @@ -24,18 +25,18 @@ OL = <ol>$0</ol> UL = <ul>$0</ul> LI = <li>$0</li> -BIG =<big>$0</big> +BIG = <big>$0</big> SMALL = <small>$0</small> BR = <br> LINK = <a href="$0">$0</a> LINK2 = <a href="$1">$+</a> -RED = <font color=red>$0</font> -BLUE =<font color=blue>$0</font> -GREEN = <font color=green>$0</font> -YELLOW = <font color=yellow>$0</font> -BLACK = <font color=black>$0</font> -WHITE = <font color=white>$0</font> +RED = <font color="red">$0</font> +BLUE = <font color="blue">$0</font> +GREEN = <font color="green">$0</font> +YELLOW = <font color="yellow">$0</font> +BLACK = <font color="black">$0</font> +WHITE = <font color="white">$0</font> D_CODE = <pre class="d_code">$0</pre> D_COMMENT = $(GREEN $0) @@ -85,11 +86,12 @@ DDOC_STRUCT_MEMBERS = $(DDOC_MEMBERS $0) DDOC_ENUM_MEMBERS = $(DDOC_MEMBERS $0) DDOC_TEMPLATE_MEMBERS = $(DDOC_MEMBERS $0) -DDOC_PARAMS = $(B Params:)$(BR)\n$(TABLE $0)$(BR) +DDOC_PARAMS = $(B Params:)$(BR) +$(TABLE $0)$(BR) DDOC_PARAM_ROW = $(TR $0) -DDOC_PARAM_ID = $(TD $0) -DDOC_PARAM_DESC = $(TD $0) -DDOC_BLANKLINE = $(BR)$(BR) +DDOC_PARAM_ID = $(TD $0) +DDOC_PARAM_DESC = $(TD $0) +DDOC_BLANKLINE = $(BR)$(BR) DDOC_PSYMBOL = $(U $0) DDOC_KEYWORD = $(B $0)