Mercurial > projects > dil
diff trunk/src/dil/File.d @ 518:8f86bb9ef715
Added module dil.Converter and dil.FileBOM.
Moved code from dil.File to dil.FileBOM.
Added opCatAssign to class InformationManager.
Added encode() function to dil.Unicode.
author | Aziz K?ksal <aziz.koeksal@gmail.com> |
---|---|
date | Sat, 15 Dec 2007 18:55:06 +0100 |
parents | 3aa00474b381 |
children | 50e64bab9c7a |
line wrap: on
line diff
--- a/trunk/src/dil/File.d Fri Dec 14 23:10:35 2007 +0100 +++ b/trunk/src/dil/File.d Sat Dec 15 18:55:06 2007 +0100 @@ -3,6 +3,10 @@ License: GPL3 +/ module dil.File; + +import dil.FileBOM; +import dil.Information; +import dil.Converter; import tango.io.File; import std.utf; import common; @@ -10,10 +14,16 @@ /// Loads a file in any valid Unicode format and converts it to UTF-8. char[] loadFile(char[] filePath) { - return data2Utf8(cast(ubyte[]) (new File(filePath)).read()); + return data2UTF8(cast(ubyte[]) (new File(filePath)).read()); } -char[] data2Utf8(ubyte[] data) +char[] loadFile(char[] filePath, InformationManager infoMan) +{ + auto converter = Converter(filePath, infoMan); + return converter.data2UTF8(cast(ubyte[]) (new File(filePath)).read()); +} + +char[] data2UTF8(ubyte[] data) { if (data.length == 0) return null; @@ -139,82 +149,3 @@ ubyte[] test = cast(ubyte[])x"1A 2B 3C 4D"; assert(utf32BEtoLE(test) == cast(ubyte[])x"4D 3C 2B 1A"); } - -/// Byte Order Mark -enum BOM -{ - None, /// No BOM - UTF8, /// UTF-8: EF BB BF - UTF16BE, /// UTF-16 Big Endian: FE FF - UTF16LE, /// UTF-16 Little Endian: FF FE - UTF32BE, /// UTF-32 Big Endian: 00 00 FE FF - UTF32LE /// UTF-32 Little Endian: FF FE 00 00 -} - -BOM tellBOM(ubyte[] data) -{ - BOM bom = BOM.None; - if (data.length < 2) - return bom; - - if (data[0..2] == cast(ubyte[2])x"FE FF") - { - bom = BOM.UTF16BE; // FE FF - } - else if (data[0..2] == cast(ubyte[2])x"FF FE") - { - if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"00 00") - bom = BOM.UTF32LE; // FF FE 00 00 - else - bom = BOM.UTF16LE; // FF FE XX XX - } - else if (data[0..2] == cast(ubyte[2])x"00 00") - { - if (data.length >= 4 && data[2..4] == cast(ubyte[2])x"FE FF") - bom = BOM.UTF32BE; // 00 00 FE FF - } - else if (data[0..2] == cast(ubyte[2])x"EF BB") - { - if (data.length >= 3 && data[2] == '\xBF') - bom = BOM.UTF8; // EF BB BF - } - return bom; -} - -unittest -{ - Stdout("Testing function tellBOM().\n"); - - struct Data2BOM - { - ubyte[] data; - BOM bom; - } - alias ubyte[] ub; - const Data2BOM[] map = [ - {cast(ub)x"12", BOM.None}, - {cast(ub)x"12 34", BOM.None}, - {cast(ub)x"00 00 FF FE", BOM.None}, - {cast(ub)x"EF BB FF", BOM.None}, - - {cast(ub)x"EF", BOM.None}, - {cast(ub)x"EF BB", BOM.None}, - {cast(ub)x"FE", BOM.None}, - {cast(ub)x"FF", BOM.None}, - {cast(ub)x"00", BOM.None}, - {cast(ub)x"00 00", BOM.None}, - {cast(ub)x"00 00 FE", BOM.None}, - - {cast(ub)x"FE FF 00", BOM.UTF16BE}, - {cast(ub)x"FE FF 00 FF", BOM.UTF16BE}, - - {cast(ub)x"EF BB BF", BOM.UTF8}, - {cast(ub)x"FE FF", BOM.UTF16BE}, - {cast(ub)x"FF FE", BOM.UTF16LE}, - {cast(ub)x"00 00 FE FF", BOM.UTF32BE}, - {cast(ub)x"FF FE 00 00", BOM.UTF32LE} - ]; - - foreach (pair; map) - assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data)); -}