changeset 352:321df078e247

- Added code for detecting Unicode format of a file without a BOM.
author aziz
date Sun, 26 Aug 2007 00:55:05 +0000
parents 97a9a2d7d46d
children a3847ea28fee
files trunk/src/dil/File.d
diffstat 1 files changed, 19 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/trunk/src/dil/File.d	Sun Aug 26 00:12:00 2007 +0000
+++ b/trunk/src/dil/File.d	Sun Aug 26 00:55:05 2007 +0000
@@ -5,6 +5,7 @@
 module dil.File;
 import std.stdio, std.file, std.utf;
 
+/// Loads a file in any valid Unicode format and converts it to UTF-8.
 char[] loadFile(char[] fileName)
 {
   ubyte[] data = cast(ubyte[]) std.file.read(fileName);
@@ -14,6 +15,24 @@
   switch (bom)
   {
   case BOM.None:
+    // No BOM found. The spec says in this case that the first character
+    // must be an ASCII character.
+    if (data.length >= 4)
+    {
+      if (data[0..3] == cast(ubyte[3])x"00 00 00")
+        text = toUTF8(cast(dchar[])utf32BEtoLE(data));
+      else if (data[1..4] == cast(ubyte[3])x"00 00 00")
+        text = toUTF8(cast(dchar[])data);
+    }
+    else if (data.length >= 2)
+    {
+      if (data[0] == 0)
+        text = toUTF8(cast(wchar[])utf16BEtoLE(data));
+      else if (data[1] == 0)
+        text = toUTF8(cast(wchar[])data);
+    }
+    else
+      text = cast(char[])data;
     break;
   case BOM.UTF8:
     text = cast(char[])data[3..$];