Mercurial > projects > dil
changeset 352:321df078e247
- Added code for detecting Unicode format of a file without a BOM.
author | aziz |
---|---|
date | Sun, 26 Aug 2007 00:55:05 +0000 |
parents | 97a9a2d7d46d |
children | a3847ea28fee |
files | trunk/src/dil/File.d |
diffstat | 1 files changed, 19 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/trunk/src/dil/File.d Sun Aug 26 00:12:00 2007 +0000 +++ b/trunk/src/dil/File.d Sun Aug 26 00:55:05 2007 +0000 @@ -5,6 +5,7 @@ module dil.File; import std.stdio, std.file, std.utf; +/// Loads a file in any valid Unicode format and converts it to UTF-8. char[] loadFile(char[] fileName) { ubyte[] data = cast(ubyte[]) std.file.read(fileName); @@ -14,6 +15,24 @@ switch (bom) { case BOM.None: + // No BOM found. The spec says in this case that the first character + // must be an ASCII character. + if (data.length >= 4) + { + if (data[0..3] == cast(ubyte[3])x"00 00 00") + text = toUTF8(cast(dchar[])utf32BEtoLE(data)); + else if (data[1..4] == cast(ubyte[3])x"00 00 00") + text = toUTF8(cast(dchar[])data); + } + else if (data.length >= 2) + { + if (data[0] == 0) + text = toUTF8(cast(wchar[])utf16BEtoLE(data)); + else if (data[1] == 0) + text = toUTF8(cast(wchar[])data); + } + else + text = cast(char[])data; break; case BOM.UTF8: text = cast(char[])data[3..$];