changeset 739:49fe21aa387c

Added sanitizeText() to dil.Converter. Cleaned predefined.ddoc up a bit. Removed makeString() from dil.doc.Macro. Added REPLACEMENT_STR to dil.Unicode.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sat, 09 Feb 2008 14:24:35 +0100
parents 2afcc305831a
children f3dead0310ce
files trunk/src/cmd/DDoc.d trunk/src/dil/Converter.d trunk/src/dil/Unicode.d trunk/src/dil/doc/Doc.d trunk/src/dil/doc/Macro.d trunk/src/predefined.ddoc
diffstat 6 files changed, 82 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/cmd/DDoc.d	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/cmd/DDoc.d	Sat Feb 09 14:24:35 2008 +0100
@@ -21,6 +21,7 @@
 import dil.semantic.Symbols;
 import dil.Information;
 import dil.File;
+import dil.Converter;
 import common;
 
 import tango.stdc.time : time_t, time, ctime;
@@ -37,7 +38,7 @@
   MacroParser mparser;
   foreach (macroPath; macroPaths)
   {
-    auto macros = mparser.parse(loadFile(macroPath));
+    auto macros = mparser.parse(loadMacroFile(macroPath));
     mtable = new MacroTable(mtable);
     mtable.insert(macros);
   }
@@ -90,6 +91,11 @@
   file.write(fileText);
 }
 
+string loadMacroFile(string filePath)
+{
+  return sanitizeText(loadFile(filePath));
+}
+
 /// Traverses the syntax tree and writes DDoc macros to a string buffer.
 class DDocEmitter : DefaultVisitor
 {
--- a/trunk/src/dil/Converter.d	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/dil/Converter.d	Sat Feb 09 14:24:35 2008 +0100
@@ -237,3 +237,61 @@
     return text;
   }
 }
+
+/// Replaces invalid UTF-8 sequences with U+FFFD (if there's enough space,)
+/// and Newlines with '\n'.
+string sanitizeText(string text)
+{
+  if (!text.length)
+    return null;
+
+  char* p = text.ptr;
+  char* end = p + text.length;
+  char* q = p;
+
+  for (; p < end; p++, q++)
+  {
+    assert(q <= p);
+    switch (*p)
+    {
+    case '\r':
+      if (p+1 < end && p[1] == '\n')
+        p++;
+    case '\n':
+      *q = '\n';
+      continue;
+    default:
+      if (isascii(*p))
+        break;
+      if (p+2 < end && isUnicodeNewline(p))
+      {
+        p += 2;
+        goto case '\n';
+      }
+      auto p2 = p; // Beginning of the UTF-8 sequence.
+      dchar c = decode(p, end);
+      if (c == ERROR_CHAR)
+      { // Skip to next ASCII character or valid UTF-8 sequence.
+        while (++p < end && isTrailByte(*p))
+        {}
+        alias REPLACEMENT_STR R;
+        if (q+2 < p) // Copy replacement char if there is enough space.
+          (*q = R[0]), (*++q = R[1]), (*++q = R[2]);
+        p--;
+      }
+      else
+      { // Copy the valid UTF-8 sequence.
+        while (p2 <= p) // p points to the last trail byte.
+          *q++ = *p2++; // Copy code units.
+        q--;
+      }
+      continue;
+    }
+    assert(isascii(*p));
+    *q = *p;
+  }
+  assert(p == end);
+  text.length = text.length - (p - q);
+  //text = text.ptr[0 .. q - text.ptr]; // Another way.
+  return text;
+}
--- a/trunk/src/dil/Unicode.d	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/dil/Unicode.d	Sat Feb 09 14:24:35 2008 +0100
@@ -7,6 +7,7 @@
 
 /// U+FFFD = �. Used to replace invalid Unicode characters.
 const dchar REPLACEMENT_CHAR = '\uFFFD';
+const char[3] REPLACEMENT_STR = \uFFFD; /// Ditto
 /// Invalid character, returned on errors.
 const dchar ERROR_CHAR = 0xD800;
 
--- a/trunk/src/dil/doc/Doc.d	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/dil/doc/Doc.d	Sat Feb 09 14:24:35 2008 +0100
@@ -382,7 +382,7 @@
     default:
       if (!isascii(result[i]) && i+2 < len && isUnicodeNewline(result.ptr + i))
       {
-        i++; i++;
+        i += 2;
         goto case '\n';
       }
     }
--- a/trunk/src/dil/doc/Macro.d	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/dil/doc/Macro.d	Sat Feb 09 14:24:35 2008 +0100
@@ -81,11 +81,7 @@
   }
 }
 
-char[] makeString(char* begin, char* end)
-{
-  return begin[0 .. end - begin];
-}
-
+/// Expands the macros from the table in text.
 char[] expandMacros(MacroTable table, char[] text, char[][] args = null)
 {
   char[] result;
--- a/trunk/src/predefined.ddoc	Sat Feb 09 02:06:32 2008 +0100
+++ b/trunk/src/predefined.ddoc	Sat Feb 09 14:24:35 2008 +0100
@@ -6,7 +6,8 @@
 <body>
 <h1>$(TITLE)</h1>
 $(BODY)
-<p style="font-size:0.95em;">$(COPYRIGHT) Generated by $(LINK2 http://code.google.com/p/dil, dil) at $(DATETIME).</p>
+<hr>
+<p class="footer">$(COPYRIGHT) Generated by $(LINK2 http://code.google.com/p/dil, dil) at $(DATETIME).</p>
 </body>
 </html>
 
@@ -24,18 +25,18 @@
 OL = <ol>$0</ol>
 UL = <ul>$0</ul>
 LI = <li>$0</li>
-BIG =<big>$0</big>
+BIG = <big>$0</big>
 SMALL = <small>$0</small>
 BR = <br>
 LINK = <a href="$0">$0</a>
 LINK2 = <a href="$1">$+</a>
 
-RED = <font color=red>$0</font>
-BLUE =<font color=blue>$0</font>
-GREEN = <font color=green>$0</font>
-YELLOW = <font color=yellow>$0</font>
-BLACK = <font color=black>$0</font>
-WHITE = <font color=white>$0</font>
+RED = <font color="red">$0</font>
+BLUE = <font color="blue">$0</font>
+GREEN = <font color="green">$0</font>
+YELLOW = <font color="yellow">$0</font>
+BLACK = <font color="black">$0</font>
+WHITE = <font color="white">$0</font>
 
 D_CODE = <pre class="d_code">$0</pre>
 D_COMMENT = $(GREEN $0)
@@ -85,11 +86,12 @@
 DDOC_STRUCT_MEMBERS   = $(DDOC_MEMBERS $0)
 DDOC_ENUM_MEMBERS     = $(DDOC_MEMBERS $0)
 DDOC_TEMPLATE_MEMBERS = $(DDOC_MEMBERS $0)
-DDOC_PARAMS    = $(B Params:)$(BR)\n$(TABLE $0)$(BR)
+DDOC_PARAMS = $(B Params:)$(BR)
+$(TABLE $0)$(BR)
 DDOC_PARAM_ROW = $(TR $0)
-DDOC_PARAM_ID  = $(TD $0)
-DDOC_PARAM_DESC  = $(TD $0)
-DDOC_BLANKLINE	= $(BR)$(BR)
+DDOC_PARAM_ID = $(TD $0)
+DDOC_PARAM_DESC = $(TD $0)
+DDOC_BLANKLINE = $(BR)$(BR)
 
 DDOC_PSYMBOL = $(U $0)
 DDOC_KEYWORD = $(B $0)