diff trunk/src/dil/File.d @ 518:8f86bb9ef715

Added module dil.Converter and dil.FileBOM. Moved code from dil.File to dil.FileBOM. Added opCatAssign to class InformationManager. Added encode() function to dil.Unicode.
author Aziz K?ksal <aziz.koeksal@gmail.com>
date Sat, 15 Dec 2007 18:55:06 +0100
parents 3aa00474b381
children 50e64bab9c7a
line wrap: on
line diff
--- a/trunk/src/dil/File.d	Fri Dec 14 23:10:35 2007 +0100
+++ b/trunk/src/dil/File.d	Sat Dec 15 18:55:06 2007 +0100
@@ -3,6 +3,10 @@
   License: GPL3
 +/
 module dil.File;
+
+import dil.FileBOM;
+import dil.Information;
+import dil.Converter;
 import tango.io.File;
 import std.utf;
 import common;
@@ -10,10 +14,16 @@
 /// Loads a file in any valid Unicode format and converts it to UTF-8.
 char[] loadFile(char[] filePath)
 {
-  return data2Utf8(cast(ubyte[]) (new File(filePath)).read());
+  return data2UTF8(cast(ubyte[]) (new File(filePath)).read());
 }
 
-char[] data2Utf8(ubyte[] data)
+char[] loadFile(char[] filePath, InformationManager infoMan)
+{
+  auto converter = Converter(filePath, infoMan);
+  return converter.data2UTF8(cast(ubyte[]) (new File(filePath)).read());
+}
+
+char[] data2UTF8(ubyte[] data)
 {
   if (data.length == 0)
     return null;
@@ -139,82 +149,3 @@
   ubyte[] test = cast(ubyte[])x"1A 2B 3C 4D";
   assert(utf32BEtoLE(test) == cast(ubyte[])x"4D 3C 2B 1A");
 }
-
-/// Byte Order Mark
-enum BOM
-{
-  None,    /// No BOM
-  UTF8,    /// UTF-8: EF BB BF
-  UTF16BE, /// UTF-16 Big Endian: FE FF
-  UTF16LE, /// UTF-16 Little Endian: FF FE
-  UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF
-  UTF32LE  /// UTF-32 Little Endian: FF FE 00 00
-}
-
-BOM tellBOM(ubyte[] data)
-{
-  BOM bom = BOM.None;
-  if (data.length < 2)
-    return bom;
-
-  if (data[0..2] == cast(ubyte[2])x"FE FF")
-  {
-    bom = BOM.UTF16BE; // FE FF
-  }
-  else if (data[0..2] == cast(ubyte[2])x"FF FE")
-  {
-    if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"00 00")
-      bom = BOM.UTF32LE; // FF FE 00 00
-    else
-      bom = BOM.UTF16LE; // FF FE XX XX
-  }
-  else if (data[0..2] == cast(ubyte[2])x"00 00")
-  {
-    if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"FE FF")
-      bom = BOM.UTF32BE; // 00 00 FE FF
-  }
-  else if (data[0..2] ==  cast(ubyte[2])x"EF BB")
-  {
-    if (data.length >= 3 && data[2] == '\xBF')
-      bom =  BOM.UTF8; // EF BB BF
-  }
-  return bom;
-}
-
-unittest
-{
-  Stdout("Testing function tellBOM().\n");
-
-  struct Data2BOM
-  {
-    ubyte[] data;
-    BOM bom;
-  }
-  alias ubyte[] ub;
-  const Data2BOM[] map = [
-    {cast(ub)x"12",          BOM.None},
-    {cast(ub)x"12 34",       BOM.None},
-    {cast(ub)x"00 00 FF FE", BOM.None},
-    {cast(ub)x"EF BB FF",    BOM.None},
-
-    {cast(ub)x"EF",          BOM.None},
-    {cast(ub)x"EF BB",       BOM.None},
-    {cast(ub)x"FE",          BOM.None},
-    {cast(ub)x"FF",          BOM.None},
-    {cast(ub)x"00",          BOM.None},
-    {cast(ub)x"00 00",       BOM.None},
-    {cast(ub)x"00 00 FE",    BOM.None},
-
-    {cast(ub)x"FE FF 00",    BOM.UTF16BE},
-    {cast(ub)x"FE FF 00 FF", BOM.UTF16BE},
-
-    {cast(ub)x"EF BB BF",    BOM.UTF8},
-    {cast(ub)x"FE FF",       BOM.UTF16BE},
-    {cast(ub)x"FF FE",       BOM.UTF16LE},
-    {cast(ub)x"00 00 FE FF", BOM.UTF32BE},
-    {cast(ub)x"FF FE 00 00", BOM.UTF32LE}
-  ];
-
-  foreach (pair; map)
-    assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data));
-}