# HG changeset patch # User Aziz K?ksal # Date 1189869146 -7200 # Node ID 33b566df6af420e24851c6322aac21eabc2acd25 # Parent 4d36eea1bbc9ac540a6c6e1a80641fbed09a806a Migrated project to Tango. Decremented the numbers of the format placeholders in the localized messages by one. Replaced all instances of writef/ln with Stdout. Added module common.d with string aliases and a global Layout!(char) instance. Replaced %s format specifiers with index placeholders in html/xml_tags. Changed member Information.arguments to string message. Copied std.metastring, std.uni and std.utf from Phobos. diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/AUTHORS --- a/trunk/AUTHORS Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/AUTHORS Sat Sep 15 17:12:26 2007 +0200 @@ -1,4 +1,4 @@ Founder: Aziz Köksal Contributors: - Jari-Matti Mäkelä \ No newline at end of file + Jari-Matti Mäkelä diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/cmd/Generate.d --- a/trunk/src/cmd/Generate.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/cmd/Generate.d Sat Sep 15 17:12:26 2007 +0200 @@ -7,7 +7,8 @@ import dil.Token; import dil.Parser, dil.Lexer; import dil.File; -import std.stdio; +import tango.io.Print; +import common; enum DocOption { @@ -20,9 +21,9 @@ void execute(string fileName, DocOption options) { if (options & DocOption.Syntax) - syntaxToDoc(fileName, options); + syntaxToDoc(fileName, Stdout, options); else - tokensToDoc(fileName, options); + tokensToDoc(fileName, Stdout, options); } char[] xml_escape(char[] text) @@ -39,6 +40,26 @@ return result; } + +/// Find object in subject and return position. +/// Returns -1 if no match was found. +int find(char[] subject, char[] object) +{ + if (object.length > subject.length) + return -1; + foreach (i, c; subject) + { + if (c == object[0]) + { + if (object.length > (subject.length - i)) + return -1; + if (object == subject[i..i+object.length]) + return i; + } + } + return -1; +} + char[] getShortClassName(Node n) { static char[][] name_table; @@ -48,7 +69,6 @@ if (name !is null) return name; - alias std.string.find find; name = n.classinfo.name; name = name[find(name, ".")+1 .. $]; // Remove package name name = name[find(name, ".")+1 .. $]; // Remove module name @@ -123,9 +143,9 @@ // CompEnd ``, // Error - `

%s(%d)%s: %s

`, + `

{1}({2}){3}: {4}

`, // SyntaxBegin - ``, + ``, // SyntaxEnd ``, // SrcBegin @@ -135,15 +155,15 @@ // Tail ``, // Identifier - `%s`, + `{0}`, // Comment - `%s`, + `{1}`, // StringLiteral - `%s`, + `{0}`, // CharLiteral - `%s`, + `{0}`, // Operator - `%s`, + `{0}`, // LorG `<>`, // LessEqual @@ -159,21 +179,21 @@ // Not `!`, // Number - `%s`, + `{0}`, // Bracket - `%s`, + `{0}`, // SpecialToken - `%s`, + `{0}`, // Shebang - `%s`, + `{0}`, // Keyword - `%s`, + `{0}`, // HLineBegin ``, // HLineEnd "", // Filespec - `%s`, + `{0}`, ]; auto xml_tags = [ @@ -186,11 +206,11 @@ // CompEnd ``, // Error - `%s(%d)%s: %s`, + `{1}({2}){3}: {4}`, // SyntaxBegin - `<%s t="%s">`, + `<{0} t="{1}">`, // SyntaxEnd - ``, + ``, // SrcBegin ``, // SrcEnd @@ -198,15 +218,15 @@ // Tail ``, // Identifier - "%s", + "{0}", // Comment - `%s`, + `{1}`, // StringLiteral - "%s", + "{0}", // CharLiteral - "%s", + "{0}", // Operator - "%s", + "{0}", // LorG `<>`, // LessEqual @@ -222,27 +242,27 @@ // Not `!`, // Number - "%s", + "{0}", // Bracket - "
%s
", + "
{0}
", // SpecialToken - "%s", + "{0}", // Shebang - "%s", + "{0}", // Keyword - "%s", + "{0}", // HLineBegin "", // HLineEnd "", // Filespec - "%s", + "{0}", ]; static assert(html_tags.length == DocPart.max+1); static assert(xml_tags.length == DocPart.max+1); -void syntaxToDoc(string fileName, DocOption options) +void syntaxToDoc(string fileName, Print!(char) print, DocOption options) { auto tags = options & DocOption.HTML ? html_tags : xml_tags; auto sourceText = loadFile(fileName); @@ -252,22 +272,22 @@ auto token = lx.head; - writefln(tags[DocPart.Head]); + print(tags[DocPart.Head]~\n); // Output error messages. if (lx.errors.length || parser.errors.length) { - writefln(tags[DocPart.CompBegin]); + print(tags[DocPart.CompBegin]~\n); foreach (error; lx.errors) { - writefln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); + print.formatln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); } foreach (error; parser.errors) { - writefln(tags[DocPart.Error], "P", lx.fileName, error.loc, "P", xml_escape(error.getMsg)); + print.formatln(tags[DocPart.Error], "P", lx.fileName, error.loc, "P", xml_escape(error.getMsg)); } - writefln(tags[DocPart.CompEnd]); + print(tags[DocPart.CompEnd]~\n); } - writef(tags[DocPart.SrcBegin]); + print(tags[DocPart.SrcBegin]); Node[][Token*] beginNodes, endNodes; @@ -316,10 +336,10 @@ if (nodes) { foreach (node; *nodes) - writef(tags[DocPart.SyntaxBegin], getTag(node.category), getShortClassName(node)); + print.format(tags[DocPart.SyntaxBegin], getTag(node.category), getShortClassName(node)); } - printToken(token, tags); + printToken(token, tags, print); nodes = token in endNodes; @@ -327,15 +347,15 @@ { foreach_reverse (node; *nodes) if (options & DocOption.HTML) - writef(tags[DocPart.SyntaxEnd]); + print(tags[DocPart.SyntaxEnd]); else - writef(tags[DocPart.SyntaxEnd], getTag(node.category)); + print.format(tags[DocPart.SyntaxEnd], getTag(node.category)); } } - writef(tags[DocPart.SrcEnd], tags[DocPart.Tail]); + print(\n~tags[DocPart.SrcEnd])(\n~tags[DocPart.Tail]); } -void tokensToDoc(string fileName, DocOption options) +void tokensToDoc(string fileName, Print!(char) print, DocOption options) { auto tags = options & DocOption.HTML ? html_tags : xml_tags; auto sourceText = loadFile(fileName); @@ -343,41 +363,41 @@ auto token = lx.getTokens(); - writefln(tags[DocPart.Head]); + print(tags[DocPart.Head]~\n); if (lx.errors.length) { - writefln(tags[DocPart.CompBegin]); + print(tags[DocPart.CompBegin]~\n); foreach (error; lx.errors) { - writefln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); + print.formatln(tags[DocPart.Error], "L", lx.fileName, error.loc, "L", xml_escape(error.getMsg)); } - writefln(tags[DocPart.CompEnd]); + print(tags[DocPart.CompEnd]~\n); } - writef(tags[DocPart.SrcBegin]); + print(tags[DocPart.SrcBegin]); // Traverse linked list and print tokens. while (token.type != TOK.EOF) { token = token.next; - printToken(token, tags); + printToken(token, tags, print); } - writef(\n, tags[DocPart.SrcEnd], \n, tags[DocPart.Tail]); + print(\n~tags[DocPart.SrcEnd])(\n~tags[DocPart.Tail]); } -void printToken(Token* token, string[] tags) +void printToken(Token* token, string[] tags, Print!(char) print) { alias DocPart DP; string srcText = xml_escape(token.srcText); // Print whitespace. if (token.ws) - writef(token.ws[0..token.start - token.ws]); + print(token.ws[0..token.start - token.ws]); switch(token.type) { case TOK.Identifier: - writef(tags[DP.Identifier], srcText); + print.format(tags[DP.Identifier], srcText); break; case TOK.Comment: string t; @@ -389,13 +409,13 @@ default: assert(0); } - writef(tags[DP.Comment], t, srcText); + print.format(tags[DP.Comment], t, srcText); break; case TOK.String: - writef(tags[DP.StringLiteral], srcText); + print.format(tags[DP.StringLiteral], srcText); break; case TOK.CharLiteral, TOK.WCharLiteral, TOK.DCharLiteral: - writef(tags[DP.CharLiteral], srcText); + print.format(tags[DP.CharLiteral], srcText); break; case TOK.Assign, TOK.Equal, TOK.Less, TOK.Greater, @@ -419,69 +439,69 @@ TOK.UorL, TOK.UorLorE, TOK.LorEorG: - writef(tags[DP.Operator], srcText); + print.format(tags[DP.Operator], srcText); break; case TOK.LorG: - writef(tags[DP.LorG]); + print(tags[DP.LorG]); break; case TOK.LessEqual: - writef(tags[DP.LessEqual]); + print(tags[DP.LessEqual]); break; case TOK.GreaterEqual: - writef(tags[DP.GreaterEqual]); + print(tags[DP.GreaterEqual]); break; case TOK.AndLogical: - writef(tags[DP.AndLogical]); + print(tags[DP.AndLogical]); break; case TOK.OrLogical: - writef(tags[DP.OrLogical]); + print(tags[DP.OrLogical]); break; case TOK.NotEqual: - writef(tags[DP.NotEqual]); + print(tags[DP.NotEqual]); break; case TOK.Not: // Check if this is part of a template instantiation. - // TODO: comments aren't skipped. + // TODO: comments aren't skipped. Use Token.nextNWS and Token.prevNWS if (token.prev.type == TOK.Identifier && token.next.type == TOK.LParen) goto default; - writef(tags[DP.Not]); + print(tags[DP.Not]); break; case TOK.Int32, TOK.Int64, TOK.Uint32, TOK.Uint64, TOK.Float32, TOK.Float64, TOK.Float80, TOK.Imaginary32, TOK.Imaginary64, TOK.Imaginary80: - writef(tags[DP.Number], srcText); + print.format(tags[DP.Number], srcText); break; case TOK.LParen, TOK.RParen, TOK.LBracket, TOK.RBracket, TOK.LBrace, TOK.RBrace: - writef(tags[DP.Bracket], srcText); + print.format(tags[DP.Bracket], srcText); break; case TOK.Shebang: - writef(tags[DP.Shebang], srcText); + print.format(tags[DP.Shebang], srcText); break; case TOK.HashLine: void printWS(char* start, char* end) { if (start != end) - writef(start[0 .. end - start]); + print(start[0 .. end - start]); } - writef(tags[DP.HLineBegin]); + print(tags[DP.HLineBegin]); auto num = token.line_num; if (num is null) { - writef(token.srcText); - writef(tags[DP.HLineEnd]); + print(token.srcText); + print(tags[DP.HLineEnd]); break; } // Print whitespace between #line and number auto ptr = token.start; printWS(ptr, num.start); // prints "#line" as well - printToken(num, tags); + printToken(num, tags, print); if (token.line_filespec) { auto filespec = token.line_filespec; // Print whitespace between number and filespec printWS(num.end, filespec.start); - writef(tags[DP.Filespec], xml_escape(filespec.srcText)); + print.format(tags[DP.Filespec], xml_escape(filespec.srcText)); ptr = filespec.end; } @@ -489,14 +509,14 @@ ptr = num.end; // Print remaining whitespace printWS(ptr, token.end); - writef(tags[DP.HLineEnd]); + print(tags[DP.HLineEnd]); break; default: if (token.isKeyword()) - writef(tags[DP.Keyword], srcText); + print.format(tags[DP.Keyword], srcText); else if (token.isSpecialToken) - writef(tags[DP.SpecialToken], srcText); + print.format(tags[DP.SpecialToken], srcText); else - writef("%s", srcText); + print(srcText); } } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/cmd/ImportGraph.d --- a/trunk/src/cmd/ImportGraph.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/cmd/ImportGraph.d Sat Sep 15 17:12:26 2007 +0200 @@ -10,11 +10,13 @@ import dil.File; import dil.Module; import dil.Settings; -import std.stdio : writefln, writef; -import std.path : getDirName, dirSep = sep; -import std.file : exists; -import std.string : replace; -import std.regexp; +import tango.text.Regex : RegExp = Regex; +import tango.io.FilePath; +import tango.io.FileConst; +import tango.text.Util; +import common; + +alias FileConst.PathSeparatorChar dirSep; enum IGraphOption { @@ -34,8 +36,9 @@ string modulePath; foreach (path; importPaths) { - modulePath = path ~ (path[$-1] == dirSep[0] ? "" : dirSep) ~ moduleFQN ~ ".d"; - if (exists(modulePath)) + modulePath = path ~ (path[$-1] == dirSep ? "" : [dirSep]) ~ moduleFQN ~ ".d"; + // TODO: also check for *.di? + if ((new FilePath(modulePath)).exists()) return modulePath; } return null; @@ -78,7 +81,7 @@ regexps ~= new RegExp(strRegexp); // Add directory of file and global directories to import paths. - auto fileDir = getDirName(filePath); + auto fileDir = (new FilePath(filePath)).folder(); if (fileDir.length) importPaths ~= fileDir; importPaths ~= GlobalSettings.importPaths; @@ -95,7 +98,7 @@ // Ignore module names matching regular expressions. foreach (rx; regexps) - if (rx.test(replace(moduleFQNPath, dirSep, "."))) + if (rx.test(replace(moduleFQNPath, dirSep, '.'))) return null; auto modulePath = findModulePath(moduleFQNPath, importPaths); @@ -105,7 +108,7 @@ if (options & IGraphOption.IncludeUnlocatableModules) { mod = new Vertex(null); - mod.setFQN(replace(moduleFQNPath, dirSep, ".")); + mod.setFQN(replace(moduleFQNPath, dirSep, '.')); loadedModules[moduleFQNPath] = mod; loadedModulesList ~= mod; mod.id = loadedModulesList.length -1; @@ -173,7 +176,7 @@ return; foreach (vertex; vertices) { - writefln(indent, vertex.filePath); + Stdout(indent)(vertex.filePath).newline; if (vertex.outgoing.length) printPaths(vertex.outgoing, level-1, indent~" "); } @@ -185,7 +188,7 @@ return; foreach (vertex; vertices) { - writefln(indent, vertex.getFQN()); + Stdout(indent)(vertex.getFQN()).newline; if (vertex.outgoing.length) printList(vertex.outgoing, level-1, indent~" "); } @@ -202,28 +205,28 @@ IGraphOption.HighlightCyclicEdges)) analyzeGraph(loadedModulesList, edges.dup); - writefln("Digraph ModuleDependencies\n{"); + Stdout("Digraph ModuleDependencies\n{\n"); if (options & IGraphOption.HighlightCyclicVertices) foreach (i, module_; loadedModulesList) - writefln(` n%d [label="%s"%s];`, i, module_.getFQN(), (module_.isCyclic ? ",style=filled,fillcolor=tomato" : "")); + Stdout.format(` n{0} [label="{1}"{2}];`, i, module_.getFQN(), (module_.isCyclic ? ",style=filled,fillcolor=tomato" : "")).newline; else foreach (i, module_; loadedModulesList) - writefln(` n%d [label="%s"];`, i, module_.getFQN()); + Stdout.format(` n{0} [label="{1}"];`, i, module_.getFQN()).newline; foreach (edge; edges) - writefln(` n%d -> n%d%s;`, edge.outgoing.id, edge.incoming.id, (edge.isCyclic ? "[color=red]" : "")); + Stdout.format(` n{0} -> n{1}{2};`, edge.outgoing.id, edge.incoming.id, (edge.isCyclic ? "[color=red]" : "")).newline; if (options & IGraphOption.GroupByFullPackageName) foreach (packageName, vertices; verticesByPckgName) { - writef(` subgraph "cluster_%s" {`\n` label="%s";color=blue;`"\n ", packageName, packageName); + Stdout.format(` subgraph "cluster_{0}" {`\n` label="{1}";color=blue;`"\n ", packageName, packageName); foreach (module_; vertices) - writef(`n%d;`, module_.id); - writefln("\n }"); + Stdout.format(`n{0};`, module_.id); + Stdout("\n }\n"); } - writefln("}"); + Stdout("}\n"); } void analyzeGraph(Vertex[] vertices, Edge[] edges) diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/cmd/Statistics.d --- a/trunk/src/cmd/Statistics.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/cmd/Statistics.d Sat Sep 15 17:12:26 2007 +0200 @@ -6,7 +6,7 @@ import dil.Token; import dil.File; import dil.Lexer; -import std.stdio; +import common; struct Statistics { @@ -24,7 +24,6 @@ auto lx = new Lexer(sourceText, fileName); auto token = lx.getTokens(); - char* end = lx.text.ptr; Statistics stats; // Traverse linked list. @@ -33,9 +32,10 @@ token = token.next; // Count whitespace characters - if (end != token.start) + if (token.ws) { - stats.whitespaceCount += token.start - end; + // TODO: naive method doesn't account for \r\n, LS and PS. + stats.whitespaceCount += token.start - token.ws; } switch (token.type) @@ -58,21 +58,20 @@ if (token.isWhitespace) stats.wsTokenCount++; - - end = token.end; } - writefln("Whitespace character count: %s\n" - "Whitespace token count: %s\n" - "Keyword count: %s\n" - "Identifier count: %s\n" - "Number count: %s\n" - "Comment count: %s\n" - "Lines of code: %s", - stats.whitespaceCount, - stats.wsTokenCount, - stats.keywordCount, - stats.identCount, - stats.numberCount, - stats.commentCount, - lx.loc); + Stdout.formatln( + "Whitespace character count: {0}\n" + "Whitespace token count: {1}\n" + "Keyword count: {2}\n" + "Identifier count: {3}\n" + "Number count: {4}\n" + "Comment count: {5}\n" + "Lines of code: {6}", + stats.whitespaceCount, + stats.wsTokenCount, + stats.keywordCount, + stats.identCount, + stats.numberCount, + stats.commentCount, + lx.loc); } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/common.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/common.d Sat Sep 15 17:12:26 2007 +0200 @@ -0,0 +1,18 @@ +/++ + Author: Aziz Köksal + License: GPL3 ++/ +module common; + +public import tango.io.Stdout; +public import tango.text.convert.Layout; + +alias char[] string; +alias wchar[] wstring; +alias dchar[] dstring; + +static Layout!(char) Format; +static this() +{ + Format = new typeof(Format); +} diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Declarations.d --- a/trunk/src/dil/Declarations.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Declarations.d Sat Sep 15 17:12:26 2007 +0200 @@ -74,7 +74,7 @@ this.packages = moduleFQN[0..$-1]; } - string getFQN() + char[] getFQN() { auto pname = getPackageName('.'); if (pname.length) @@ -83,14 +83,14 @@ return getName(); } - string getName() + char[] getName() { if (moduleName) return moduleName.identifier; return null; } - string getPackageName(char separator) + char[] getPackageName(char separator) { char[] pname; foreach (pckg; packages) @@ -120,9 +120,9 @@ this.isStatic = isStatic; } - string[] getModuleFQNs(char separator) + char[][] getModuleFQNs(char separator) { - string[] FQNs; + char[][] FQNs; foreach (moduleFQN; moduleFQNs) { char[] FQN; diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Expressions.d --- a/trunk/src/dil/Expressions.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Expressions.d Sat Sep 15 17:12:26 2007 +0200 @@ -706,7 +706,7 @@ this.strings = strings; } - string getString() + char[] getString() { char[] buffer; foreach (strTok; strings) diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/File.d --- a/trunk/src/dil/File.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/File.d Sat Sep 15 17:12:26 2007 +0200 @@ -3,15 +3,17 @@ License: GPL3 +/ module dil.File; -import std.stdio, std.file, std.utf; +import tango.io.File; +import std.utf; +import common; /// Loads a file in any valid Unicode format and converts it to UTF-8. -char[] loadFile(char[] fileName) +char[] loadFile(char[] filePath) { - return data2text(cast(ubyte[]) std.file.read(fileName)); + return data2Utf8(cast(ubyte[]) (new File(filePath)).read()); } -char[] data2text(ubyte[] data) +char[] data2Utf8(ubyte[] data) { char[] text; BOM bom = tellBOM(data); @@ -73,7 +75,7 @@ unittest { - writefln("Testing function data2text()."); + Stdout("Testing function data2Utf8().\n"); struct Data2Text { union @@ -97,7 +99,7 @@ {u8:"\x00\x00\xFE\xFF\0\0\0s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e", text:"source"}, {u8:"\xFF\xFE\x00\x00s\0\0\0o\0\0\0u\0\0\0r\0\0\0c\0\0\0e\0\0\0", text:"source"}, ]; - alias data2text f; + alias data2Utf8 f; foreach (pair; map) assert(f(pair.data) == pair.text); } @@ -105,7 +107,7 @@ ubyte[] utf16BEtoLE(ubyte[] data) { if (data.length % 2) - throw new Exception("UTF-16 big endian source file byte length must be divisible by 2."); + throw new Exception("The byte length of a UTF-16 big endian source file must be divisible by 2."); wchar[] result = cast(wchar[]) new ubyte[data.length]; assert(result.length*2 == data.length); // BE to LE "1A 2B" -> "2B 1A" @@ -117,7 +119,7 @@ ubyte[] utf32BEtoLE(ubyte[] data) { if (data.length % 4) - throw new Exception("UTF-32 big endian source file byte length must be divisible by 4."); + throw new Exception("The byte length of a UTF-32 big endian source file must be divisible by 4."); dchar[] result = cast(dchar[]) new ubyte[data.length]; assert(result.length*4 == data.length); // BE to LE "1A 2B 3C 4D" -> "4D 3C 2B 1A" @@ -172,7 +174,7 @@ unittest { - writefln("Testing function tellBOM()."); + Stdout("Testing function tellBOM().\n"); struct Data2BOM { @@ -205,5 +207,5 @@ ]; foreach (pair; map) - assert(tellBOM(pair.data) == pair.bom, std.string.format("Failed at %s", pair.data)); + assert(tellBOM(pair.data) == pair.bom, Format("Failed at {0}", pair.data)); } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Identifier.d --- a/trunk/src/dil/Identifier.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Identifier.d Sat Sep 15 17:12:26 2007 +0200 @@ -4,6 +4,7 @@ +/ module dil.Identifier; import dil.Token; +import common; struct Identifier { diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Information.d --- a/trunk/src/dil/Information.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Information.d Sat Sep 15 17:12:26 2007 +0200 @@ -4,7 +4,7 @@ +/ module dil.Information; import dil.Messages; -import std.stdarg; +import common; enum InfoType { @@ -18,18 +18,18 @@ MID id; InfoType type; uint loc; - string[] arguments; + string message; - this(InfoType type, MID id, uint loc, string[] arguments) + this(InfoType type, MID id, uint loc, string message) { this.id = id; this.type = type; this.loc = loc; - this.arguments = arguments; + this.message = message; } string getMsg() { - return format_args(GetMsg(id), arguments); + return this.message; } } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Lexer.d --- a/trunk/src/dil/Lexer.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Lexer.d Sat Sep 15 17:12:26 2007 +0200 @@ -10,13 +10,13 @@ import dil.Messages; import dil.HtmlEntities; import dil.Settings; -import std.stdio; +import tango.stdc.stdlib : strtof, strtod, strtold; +import tango.stdc.errno : errno, ERANGE; +import tango.stdc.time : time_t, time, ctime; +import tango.stdc.string : strlen; import std.utf; import std.uni; -import std.c.stdlib : strtof, strtod, strtold, getErrno, ERANGE; -import std.c.time : time_t, time, ctime; -import std.c.string : strlen; -import std.string; +import common; const char[3] LS = \u2028; const char[3] PS = \u2029; @@ -169,7 +169,7 @@ out { assert(text.ptr <= t.start && t.start < end); - assert(text.ptr < t.end && t.end <= end, std.string.format(t.type)); + assert(text.ptr < t.end && t.end <= end, Token.toString(t.type)); } body { @@ -1693,7 +1693,7 @@ ++p; t.type += 3; // Switch to imaginary counterpart. } - if (getErrno == ERANGE) + if (errno() == ERANGE) error(MID.OverflowFloatNumber); t.end = p; } @@ -1914,15 +1914,15 @@ return this.token.type; } - void error(MID id, ...) + void error(MID mid, ...) { // if (reportErrors) - errors ~= new Information(InfoType.Lexer, id, this.errorLoc, arguments(_arguments, _argptr)); + errors ~= new Information(InfoType.Lexer, mid, this.errorLoc, Format(_arguments, _argptr, GetMsg(mid))); } unittest { - writefln("Testing method Lexer.peek()"); + Stdout("Testing method Lexer.peek()\n"); string sourceText = "unittest { }"; auto lx = new Lexer(sourceText, null); @@ -2030,7 +2030,7 @@ unittest { - writefln("Testing Lexer."); + Stdout("Testing Lexer.\n"); string[] toks = [ ">", ">=", ">>", ">>=", ">>>", ">>>=", "<", "<=", "<>", "<>=", "<<", "<<=", "!", "!<", "!>", "!<=", "!>=", "!<>", @@ -2054,7 +2054,7 @@ do { assert(i < toks.length); - assert(token.srcText == toks[i], std.string.format("Scanned '%s' but expected '%s'", token.srcText, toks[i])); + assert(token.srcText == toks[i], Format("Scanned '{0}' but expected '{1}'", token.srcText, toks[i])); ++i; token = token.next; } while (token.type != TOK.EOF) diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Messages.d --- a/trunk/src/dil/Messages.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Messages.d Sat Sep 15 17:12:26 2007 +0200 @@ -4,7 +4,7 @@ +/ module dil.Messages; import dil.Settings; -import std.stdarg; +import common; /// Index into table of compiler messages. enum MID @@ -76,41 +76,17 @@ return GlobalSettings.messages[mid]; } -char[] format(MID mid, ...) +char[] FormatMsg(MID mid, ...) { - auto args = arguments(_arguments, _argptr); - return format_args(GetMsg(mid), args); -} - -char[] format(char[] format_str, ...) -{ - auto args = arguments(_arguments, _argptr); - return format_args(format_str, args); + return Format(_arguments, _argptr, GetMsg(mid)); } -char[] format_args(char[] format_str, char[][] args) -{ - char[] result = format_str; - - foreach (i, arg; args) - result = std.string.replace(result, std.string.format("{%s}", i+1), arg); - - return result; -} - -char[][] arguments(TypeInfo[] tinfos, void* argptr) +/+ +char[] FormatArray(char[] format_str, char[][] args) { - char[][] args; - foreach (ti; tinfos) - { - if (ti == typeid(char[])) - args ~= va_arg!(char[])(argptr); - else if (ti == typeid(int)) - args ~= std.string.format(va_arg!(int)(argptr)); - else if (ti == typeid(dchar)) - args ~= std.string.format(va_arg!(dchar)(argptr)); - else - assert(0, "argument type not supported yet."); - } - return args; + auto tiinfos = new TypeInfo[args.length]; + foreach (ref tiinfo; tiinfos) + tiinfo = typeid(char[]); + return Format(tiinfos, args.ptr, format_str); } ++/ diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Module.d --- a/trunk/src/dil/Module.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Module.d Sat Sep 15 17:12:26 2007 +0200 @@ -8,7 +8,11 @@ import dil.Parser; import dil.Lexer; import dil.File; -import std.path; +import tango.io.FilePath; +import tango.io.FileConst; +import common; + +alias FileConst.PathSeparatorChar dirSep; class Module { @@ -51,7 +55,7 @@ else { // Take base name of file path as module name. - auto str = getBaseName(getName(filePath)); + auto str = (new FilePath(filePath)).name(); if (Lexer.isNonReservedIdentifier(str)) { this.moduleFQN = moduleName = str; @@ -68,7 +72,7 @@ { string[] result; foreach (import_; imports) - result ~= import_.getModuleFQNs(std.path.sep[0]); + result ~= import_.getModuleFQNs(dirSep); return result; } @@ -93,7 +97,7 @@ string getFQNPath() { if (packageName.length) - return packageName ~ std.path.sep ~ moduleName; + return packageName ~ dirSep ~ moduleName; else return moduleName; } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Parser.d --- a/trunk/src/dil/Parser.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Parser.d Sat Sep 15 17:12:26 2007 +0200 @@ -12,7 +12,7 @@ import dil.Statements; import dil.Expressions; import dil.Types; -import std.stdio; +import common; private alias TOK T; @@ -4196,7 +4196,7 @@ return null; } - void error(MID id, ...) + void error(MID mid, ...) { if (trying) { @@ -4206,7 +4206,7 @@ // if (errors.length == 10) // return; - errors ~= new Information(InfoType.Parser, id, lx.loc, arguments(_arguments, _argptr)); + errors ~= new Information(InfoType.Parser, mid, lx.loc, Format(_arguments, _argptr, GetMsg(mid))); // writefln("(%d)P: ", lx.loc, errors[$-1].getMsg); } } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Settings.d --- a/trunk/src/dil/Settings.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Settings.d Sat Sep 15 17:12:26 2007 +0200 @@ -6,7 +6,8 @@ import dil.Messages; import dil.Parser, dil.SyntaxTree, dil.Declarations, dil.Expressions; import dil.File; -import std.metastrings; +import std.metastrings : FormatT = Format, ToString; +import common; template Pad(char[] str, uint amount) { @@ -32,20 +33,20 @@ const VERSION_MINOR = 0; } -const string VERSION = Format!("%s.%s", VERSION_MAJOR, Pad!(VERSION_MINOR, 3)); +const string VERSION = FormatT!("%s.%s", VERSION_MAJOR, Pad!(VERSION_MINOR, 3)); const VENDOR = "dil"; /// Used in main help message. const COMPILED_WITH = __VENDOR__; /// ditto -const COMPILED_VERSION = Format!("%s.%s", __VERSION__/1000, Pad!(__VERSION__%1000, 3)); +const COMPILED_VERSION = FormatT!("%s.%s", __VERSION__/1000, Pad!(__VERSION__%1000, 3)); /// ditto const COMPILED_DATE = __TIMESTAMP__; struct GlobalSettings { static: - string language; /// Language of messages catalogue to load. + string language; /// Language of loaded messages catalogue. string[] messages; /// Table of localized compiler messages. string[] importPaths; /// Array of import paths to look for modules. void load() @@ -134,7 +135,7 @@ } } if (messages.length != MID.max+1) - throw new Exception(std.string.format("messages table in %s must exactly have %d entries, but %s were found.", fileName, MID.max+1, messages.length)); + throw new Exception(Format("messages table in {0} must exactly have {1} entries, but {2} were found.", fileName, MID.max+1, messages.length)); GlobalSettings.messages = messages; } } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/SyntaxTree.d --- a/trunk/src/dil/SyntaxTree.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/SyntaxTree.d Sat Sep 15 17:12:26 2007 +0200 @@ -4,6 +4,7 @@ +/ module dil.SyntaxTree; import dil.Token; +import common; enum NodeCategory { diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/dil/Token.d --- a/trunk/src/dil/Token.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/dil/Token.d Sat Sep 15 17:12:26 2007 +0200 @@ -3,8 +3,9 @@ License: GPL3 +/ module dil.Token; -import std.c.stdlib : malloc, free; -import std.outofmemory; +import common; +import tango.stdc.stdlib : malloc, free; +import tango.core.Exception; struct Position { @@ -221,10 +222,7 @@ { void* p = malloc(size); if (p is null) - version(Tango) - throw new OutOfMemoryException(__FILE__, __LINE__); - else - throw new OutOfMemoryException(); + throw new OutOfMemoryException(__FILE__, __LINE__); *cast(Token*)p = Token.init; return p; } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/lang_de.d --- a/trunk/src/lang_de.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/lang_de.d Sat Sep 15 17:12:26 2007 +0200 @@ -7,7 +7,7 @@ string[] messages = [ // Lexer messages: - "illegales Zeichen gefunden: '{1}'", + "illegales Zeichen gefunden: '{0}'", "ungültiges Unicodezeichen.", "ungültige UTF-8-Sequenz.", // '' @@ -22,7 +22,7 @@ // "" "unterminiertes Zeichenkettenliteral.", // x"" - "Nicht-Hexzeichen '{1}' in Hexzeichenkette gefunden.", + "Nicht-Hexzeichen '{0}' in Hexzeichenkette gefunden.", "ungerade Anzahl von Hexziffern in Hexzeichenkette.", "unterminierte Hexzeichenkette.", // /* */ /+ +/ @@ -32,10 +32,10 @@ "unterminierte rohe Zeichenkette.", "unterminierte Backquote-Zeichenkette.", // \x \u \U - "undefinierte Escapesequenz '{1}' gefunden.", + "undefinierte Escapesequenz '{0}' gefunden.", "unzureichende Anzahl von Hexziffern in Escapesequenz.", // \&[a-zA-Z][a-zA-Z0-9]+; - "undefinierte HTML-Entität '{1}'", + "undefinierte HTML-Entität '{0}'", "unterminierte HTML-Entität.", "HTML-Entitäten müssen mit einem Buchstaben beginnen.", // integer overflows @@ -53,24 +53,24 @@ "Exponenten müssen mit einer Dezimalziffer anfangen.", // Parser messages: - "erwartete '{1}', fand aber '{2}'.", - "'{1}' ist redundant.", + "erwartete '{0}', fand aber '{1}'.", + "'{0}' ist redundant.", "Template-Tupel-Parameter dürfen nur am Ende auftreten.", "der 'in'-Vertrag der Funktion wurde bereits geparsed.", "der 'out'-Vertrag der Funktion wurde bereits geparsed.", "es wurde kein Verbindungstyp angegeben.", - "unbekannter Verbindungstyp '{1}'; gültig sind C, C++, D, Windows, Pascal und System.", + "unbekannter Verbindungstyp '{0}'; gültig sind C, C++, D, Windows, Pascal und System.", // Help messages: - `dil v{1} + `dil v{0} Copyright (c) 2007, Aziz Köksal. Lizensiert unter der GPL3. Befehle: -{2} +{1} Geben Sie 'dil help ' ein, um mehr Hilfe zu einem bestimmten Befehl zu erhalten. -Kompiliert mit {3} v{4} am {5}.`, +Kompiliert mit {2} v{3} am {4}.`, `Generiere ein XML- oder HTML-Dokument aus einer D-Quelltextdatei. Verwendung: diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/lang_en.d --- a/trunk/src/lang_en.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/lang_en.d Sat Sep 15 17:12:26 2007 +0200 @@ -7,7 +7,7 @@ string[] messages = [ // Lexer messages: - "illegal character found: '{1}'", + "illegal character found: '{0}'", "invalid Unicode character.", "invalid UTF-8 sequence.", // '' @@ -22,7 +22,7 @@ // "" "unterminated string literal.", // x"" - "non-hex character '{1}' found in hex string.", + "non-hex character '{0}' found in hex string.", "odd number of hex digits in hex string.", "unterminated hex string.", // /* */ /+ +/ @@ -32,10 +32,10 @@ "unterminated raw string.", "unterminated back quote string.", // \x \u \U - "found undefined escape sequence '{1}'.", + "found undefined escape sequence '{0}'.", "insufficient number of hex digits in escape sequence.", // \&[a-zA-Z][a-zA-Z0-9]+; - "undefined HTML entity '{1}'", + "undefined HTML entity '{0}'", "unterminated HTML entity.", "HTML entities must begin with a letter.", // integer overflows @@ -53,23 +53,23 @@ "exponents must start with a digit.", // Parser messages - "expected '{1}', but found '{2}'.", - "'{1}' is redundant.", + "expected '{0}', but found '{1}'.", + "'{0}' is redundant.", "template tuple parameters can only be last.", "the functions 'in' contract was already parsed.", "the functions 'out' contract was already parsed.", "no linkage type was specified.", - "unrecognized linkage type '{1}'; valid types are C, C++, D, Windows, Pascal und System.", + "unrecognized linkage type '{0}'; valid types are C, C++, D, Windows, Pascal und System.", // Help messages: - `dil v{1} + `dil v{0} Copyright (c) 2007 by Aziz Köksal. Licensed under the GPL3. Subcommands: -{2} +{1} Type 'dil help ' for more help on a particular subcommand. -Compiled with {3} v{4} on {5}.`, +Compiled with {2} v{3} on {4}.`, `Generate an XML or HTML document from a D source file. Usage: diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/lang_fi.d --- a/trunk/src/lang_fi.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/lang_fi.d Sat Sep 15 17:12:26 2007 +0200 @@ -22,7 +22,7 @@ // "" "päättämätön merkkijonoliteraali.", // x"" - "ei-heksamerkki '{1}' löytyi heksajonossa.", + "ei-heksamerkki '{0}' löytyi heksajonossa.", "pariton määrä heksanumeroita heksajonossa.", "päättämätön heksajono.", // /* */ /+ +/ @@ -32,10 +32,10 @@ "päättämätön raakamerkkijono.", "päättämätön gravisaksenttimerkkijono.", // \x \u \U - "löydettiin määrittelemätön escape-sekvenssi.", // TODO: Insert '{1}' + "löydettiin määrittelemätön escape-sekvenssi.", // TODO: Insert '{0}' "riittämätön määrä heksanumeroita escape-sekvenssissä.", // \&[a-zA-Z][a-zA-Z0-9]+; - "määrittelemätön HTML-entiteetti '{1}'", + "määrittelemätön HTML-entiteetti '{0}'", "päättämätön HTML-entiteetti.", "HTML-entiteettien tulee alkaa kirjaimella.", // integer overflows @@ -53,23 +53,23 @@ "eksponenttien tulee alkaa numerolla.", // Parser messages - "odotettiin '{1}':a, mutta löydettiin '{2}'.", - "'{1}' on redundantti.", + "odotettiin '{0}':a, mutta löydettiin '{1}'.", + "'{0}' on redundantti.", "tupla voi esiintyä ainoastaan mallin viimeisenä parametrina.", "funktion alkuehto jäsennettiin jo.", "funktion loppuehto jäsennettiin jo.", "linkitystyyppiä ei määritelty.", - "tunnistamaton linkitystyyppi '{1}'; sallittuja tyyppejä ovat C, C++, D, Windows, Pascal ja System.", + "tunnistamaton linkitystyyppi '{0}'; sallittuja tyyppejä ovat C, C++, D, Windows, Pascal ja System.", // Help messages: - `dil v{1} + `dil v{0} Copyright (c) 2007, Aziz Köksal. GPL3-lisensöity. Alikomennot: -{2} +{1} Lisäohjeita tietystä alitoiminnosta saa kirjoittamalla 'dil help '. -Käännetty {3}:n versiolla {4} {5}.`, +Käännetty {2}:n versiolla {3} {4}.`, `Luo XML- tai HTML-dokumentti D-lähdekoodista. diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/lang_tr.d --- a/trunk/src/lang_tr.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/lang_tr.d Sat Sep 15 17:12:26 2007 +0200 @@ -7,7 +7,7 @@ string[] messages = [ // Lexer messages: - "illegal karakter bulundu: '{1}'", + "illegal karakter bulundu: '{0}'", "geçersiz Unikod karakteri.", "geçersiz UTF-8 serisi.", // '' @@ -22,7 +22,7 @@ // "" "kapanmamış çift tırnak dizgisi.", // x"" - "heks sayı olmayan karakter '{1}' heks dizgisi içinde bulundu.", + "heks sayı olmayan karakter '{0}' heks dizgisi içinde bulundu.", "heks dizginin içindeki sayılar çifter çifter olmalıdır.", "kapanmamış heks dizgisi.", // /* */ /+ +/ @@ -32,10 +32,10 @@ "kapanmamış çiğ dizgisi.", "kapanmamış ters tırnak dizgisi.", // \x \u \U - "tanımlanmamış çıkış serisi '{1}' bulundu.", + "tanımlanmamış çıkış serisi '{0}' bulundu.", "heksadesimal çıkış serisi sayıları yeterli değil.", // \&[a-zA-Z][a-zA-Z0-9]+; - "tanımlanmamış HTML varlık '{1}'", + "tanımlanmamış HTML varlık '{0}'", "kapanmamış HTML varlık.", "HTML varlık bir harf ile başlamalı.", // integer overflows @@ -53,23 +53,23 @@ "üsler desimal sayı ile başlamalı.", // Parser messages - "'{1}' beklendi, ama '{2}' bulundu.", - "'{1}' lüzumsuz.", + "'{0}' beklendi, ama '{1}' bulundu.", + "'{0}' lüzumsuz.", "şablon tuple parametre son sırada olmalı.", "fonksiyonun 'in' kontratı daha önceden ayrıştırılmış.", "fonksiyonun 'out' kontratı daha önceden ayrıştırılmış.", "bağlantı tüp (linkage type) belirtilmedi.", - "bilinmeyen bağlantı tüpü (linkage type) '{1}'; geçerli olanlar C, C++, D, Windows, Pascal ve System.", + "bilinmeyen bağlantı tüpü (linkage type) '{0}'; geçerli olanlar C, C++, D, Windows, Pascal ve System.", // Help messages: - `dil v{1} + `dil v{0} Copyright (c) 2007, Aziz Köksal. Lisans GPL3. Komutlar: -{2} +{1} Belirli komut'a yardım edinmek için 'dil help ' yazınız. -Bu yazılım {3} v{4} ile {5} tarihinde derletilmiş.`, +Bu yazılım {2} v{3} ile {4} tarihinde derletilmiş.`, `Bir D kaynak kodundan XML veya HTML dosyası oluştur. Kullanım: diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/main.d --- a/trunk/src/main.d Wed Sep 12 21:03:41 2007 +0200 +++ b/trunk/src/main.d Sat Sep 15 17:12:26 2007 +0200 @@ -13,14 +13,16 @@ import cmd.Generate; import cmd.Statistics; import cmd.ImportGraph; -import std.stdio, std.conv; +import common; + +import Integer = tango.text.convert.Integer; void main(char[][] args) { GlobalSettings.load(); if (args.length <= 1) - return writefln(helpMain()); + return Stdout(helpMain()).newline; string command = args[1]; switch (command) @@ -59,7 +61,7 @@ else if(strbeg(arg, "-r")) regexps ~= arg[2..$]; else if(strbeg(arg, "-l")) - levels = toUint(arg[2..$]); + levels = Integer.parse (arg[2..$]); else switch (arg) { @@ -96,7 +98,7 @@ if (args.length == 3) printHelp(args[2]); else - writefln(helpMain()); + Stdout(helpMain()); break; default: } @@ -120,7 +122,7 @@ char[] helpMain() { - return format(MID.HelpMain, VERSION, COMMANDS, COMPILED_WITH, COMPILED_VERSION, COMPILED_DATE); + return FormatMsg(MID.HelpMain, VERSION, COMMANDS, COMPILED_WITH, COMPILED_VERSION, COMPILED_DATE); } void printHelp(char[] command) @@ -137,7 +139,7 @@ default: msg = helpMain(); } - writefln(msg); + Stdout(msg).newline; } void parse(string fileName) @@ -158,6 +160,6 @@ print(root.children, ""); foreach (error; parser.errors) { - writefln(`%s(%d)P: %s`, parser.lx.fileName, error.loc, error.getMsg); + Stdout.format(`{0}({1})P: {2}`, parser.lx.fileName, error.loc, error.getMsg); } } diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/std/metastrings.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/std/metastrings.d Sat Sep 15 17:12:26 2007 +0200 @@ -0,0 +1,225 @@ + +// Written in the D programming language. + +/** + * Templates with which to do compile time manipulation of strings. + * + * Macros: + * WIKI = Phobos/StdMetastrings + * Copyright: + * Public Domain + */ + +/* + * Authors: + * Walter Bright, Digital Mars, www.digitalmars.com + * Don Clugston + */ + +/* + Note: this is not the original file! + Modified by Aziz Köksal: + Only changed some types from string to char[] +*/ + +module std.metastrings; + +/** + * Formats constants into a string at compile time. + * Analogous to std.string.format(). + * Parameters: + * A = tuple of constants, which can be strings, + * characters, or integral values. + * Formats: + * The formats supported are %s for strings, and %% + * for the % character. + * Example: + * --- +import std.metastrings; +import std.stdio; + +void main() +{ + string s = Format!("Arg %s = %s", "foo", 27); + writefln(s); // "Arg foo = 27" +} + * --- + */ + +template Format(A...) +{ + static if (A.length == 0) + const char[] Format = ""; + else static if (is(typeof(A[0]) : char[])) + const char[] Format = FormatString!(A[0], A[1..$]); + //const char[] Format = FormatString!(A[0]); + else + const char[] Format = ToString!(A[0]) ~ Format!(A[1..$]); +} + +template FormatString(char[] F, A...) +{ + static if (F.length == 0) + const char[] FormatString = Format!(A); + else static if (F.length == 1) + const char[] FormatString = F[0] ~ Format!(A); + else static if (F[0..2] == "%s") + const char[] FormatString = ToString!(A[0]) ~ FormatString!(F[2..$],A[1..$]); + else static if (F[0..2] == "%%") + const char[] FormatString = "%" ~ FormatString!(F[2..$],A); + else static if (F[0] == '%') + static assert(0, "unrecognized format %" ~ F[1]); + else + const char[] FormatString = F[0] ~ FormatString!(F[1..$],A); +} + +/** + * Convert constant argument to a string. + */ + +template ToString(ulong U) +{ + static if (U < 10) + const char[] ToString = "" ~ cast(char)(U + '0'); + else + const char[] ToString = ToString!(U / 10) ~ ToString!(U % 10); +} + +/// ditto +template ToString(long I) +{ + static if (I < 0) + const char[] ToString = "-" ~ ToString!(cast(ulong)(-I)); + else + const char[] ToString = ToString!(cast(ulong)I); +} + +static assert(ToString!(0x100000000) == "4294967296"); + +/// ditto +template ToString(uint U) +{ + const char[] ToString = ToString!(cast(ulong)U); +} + +/// ditto +template ToString(int I) +{ + const char[] ToString = ToString!(cast(long)I); +} + +/// ditto +template ToString(ushort U) +{ + const char[] ToString = ToString!(cast(ulong)U); +} + +/// ditto +template ToString(short I) +{ + const char[] ToString = ToString!(cast(long)I); +} + +/// ditto +template ToString(ubyte U) +{ + const char[] ToString = ToString!(cast(ulong)U); +} + +/// ditto +template ToString(byte I) +{ + const char[] ToString = ToString!(cast(long)I); +} + +/// ditto +template ToString(bool B) +{ + const char[] ToString = B ? "true" : "false"; +} + +/// ditto +template ToString(char[] S) +{ + const char[] ToString = S; +} + +/// ditto +template ToString(char C) +{ + const char[] ToString = "" ~ C; +} + +unittest +{ + char[] s = Format!("hel%slo", "world", -138, 'c', true); + assert(s == "helworldlo-138ctrue"); +} + + +/******** + * Parse unsigned integer literal from the start of string s. + * returns: + * .value = the integer literal as a string, + * .rest = the string following the integer literal + * Otherwise: + * .value = null, + * .rest = s + */ + +template ParseUinteger(char[] s) +{ + static if (s.length == 0) + { const char[] value = ""; + const char[] rest = ""; + } + else static if (s[0] >= '0' && s[0] <= '9') + { const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value; + const char[] rest = ParseUinteger!(s[1..$]).rest; + } + else + { const char[] value = ""; + const char[] rest = s; + } +} + +/******** + * Parse integer literal optionally preceded by '-' + * from the start of string s. + * returns: + * .value = the integer literal as a string, + * .rest = the string following the integer literal + * Otherwise: + * .value = null, + * .rest = s + */ + +template ParseInteger(char[] s) +{ + static if (s.length == 0) + { const char[] value = ""; + const char[] rest = ""; + } + else static if (s[0] >= '0' && s[0] <= '9') + { const char[] value = s[0] ~ ParseUinteger!(s[1..$]).value; + const char[] rest = ParseUinteger!(s[1..$]).rest; + } + else static if (s.length >= 2 && + s[0] == '-' && s[1] >= '0' && s[1] <= '9') + { const char[] value = s[0..2] ~ ParseUinteger!(s[2..$]).value; + const char[] rest = ParseUinteger!(s[2..$]).rest; + } + else + { const char[] value = ""; + const char[] rest = s; + } +} + +unittest +{ + assert(ParseUinteger!("1234abc").value == "1234"); + assert(ParseUinteger!("1234abc").rest == "abc"); + assert(ParseInteger!("-1234abc").value == "-1234"); + assert(ParseInteger!("-1234abc").rest == "abc"); +} + diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/std/uni.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/std/uni.d Sat Sep 15 17:12:26 2007 +0200 @@ -0,0 +1,630 @@ + +// Written in the D programming language. + +/* + * Placed into the Public Domain. + * Digital Mars, www.digitalmars.com + * Written by Walter Bright + */ + +/** + * Simple Unicode character classification functions. + * For ASCII classification, see $(LINK2 std_ctype.html, std.ctype). + * Macros: + * WIKI=Phobos/StdUni + * References: + * $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table), + * $(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia), + * $(LINK2 http://www.unicode.org, The Unicode Consortium) + * Trademarks: + * Unicode(tm) is a trademark of Unicode, Inc. + */ + + +module std.uni; + +/** + * Returns !=0 if c is a Unicode lower case character. + */ +int isUniLower(dchar c) +{ + if (c <= 0x7F) + return (c >= 'a' && c <= 'z'); + + return isUniAlpha(c) && c == toUniLower(c); +} + +/** + * Returns !=0 if c is a Unicode upper case character. + */ +int isUniUpper(dchar c) +{ + if (c <= 0x7F) + return (c >= 'A' && c <= 'Z'); + + return isUniAlpha(c) && c == toUniUpper(c); +} + +/** + * If c is a Unicode upper case character, return the lower case + * equivalent, otherwise return c. + */ +dchar toUniLower(dchar c) +{ + if (c >= 'A' && c <= 'Z') + { + c += 32; + } + else if (c >= 0x00C0) + { + if ((c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c<=0x00DE)) + { + c += 32; + } + else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178)) + { + if (c == 0x0130) + c = 0x0069; + else if ((c & 1) == 0) + c += 1; + } + else if (c == 0x0178) + { + c = 0x00FF; + } + else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F)) + { + if (c & 1) + c += 1; + } + else if (c >= 0x0200 && c <= 0x0217) + { + if ((c & 1) == 0) + c += 1; + } + else if ((c >= 0x0401 && c <= 0x040C) || (c>= 0x040E && c <= 0x040F)) + { + c += 80; + } + else if (c >= 0x0410 && c <= 0x042F) + { + c += 32; + } + else if (c >= 0x0460 && c <= 0x047F) + { + if ((c & 1) == 0) + c += 1; + } + else if (c >= 0x0531 && c <= 0x0556) + { + c += 48; + } + else if (c >= 0x10A0 && c <= 0x10C5) + { + c += 48; + } + else if (c >= 0xFF21 && c <= 0xFF3A) + { + c += 32; + } + } + return c; +} + +/** + * If c is a Unicode lower case character, return the upper case + * equivalent, otherwise return c. + */ +dchar toUniUpper(dchar c) +{ + if (c >= 'a' && c <= 'z') + { + c -= 32; + } + else if (c >= 0x00E0) + { + if ((c >= 0x00E0 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FE)) + { + c -= 32; + } + else if (c == 0x00FF) + { + c = 0x0178; + } + else if ((c >= 0x0100 && c < 0x0138) || (c > 0x0149 && c < 0x0178)) + { + if (c == 0x0131) + c = 0x0049; + else if (c & 1) + c -= 1; + } + else if ((c >= 0x0139 && c < 0x0149) || (c > 0x0178 && c < 0x017F)) + { + if ((c & 1) == 0) + c = c-1; + } + else if (c == 0x017F) + { + c = 0x0053; + } + else if (c >= 0x0200 && c <= 0x0217) + { + if (c & 1) + c = c-1; + } + else if (c >= 0x0430 && c<= 0x044F) + { + c -= 32; + } + else if ((c >= 0x0451 && c <= 0x045C) || (c >=0x045E && c<= 0x045F)) + { + c -= 80; + } + else if (c >= 0x0460 && c <= 0x047F) + { + if (c & 1) + c -= 1; + } + else if (c >= 0x0561 && c < 0x0587) + { + c -= 48; + } + else if (c >= 0xFF41 && c <= 0xFF5A) + { + c -= 32; + } + } + return c; +} + + +/******************************* + * Return !=0 if u is a Unicode alpha character. + * (general Unicode category: Lu, Ll, Lt, Lm and Lo) + * + * Standards: Unicode 5.0.0 + */ + +int isUniAlpha(dchar u) +{ + static dchar table[][2] = + [ + [ 'A', 'Z' ], + [ 'a', 'z' ], + [ 0x00AA, 0x00AA ], + [ 0x00B5, 0x00B5 ], + [ 0x00BA, 0x00BA ], + [ 0x00C0, 0x00D6 ], + [ 0x00D8, 0x00F6 ], + [ 0x00F8, 0x02C1 ], + [ 0x02C6, 0x02D1 ], + [ 0x02E0, 0x02E4 ], + [ 0x02EE, 0x02EE ], + [ 0x037A, 0x037D ], + [ 0x0386, 0x0386 ], + [ 0x0388, 0x038A ], + [ 0x038C, 0x038C ], + [ 0x038E, 0x03A1 ], + [ 0x03A3, 0x03CE ], + [ 0x03D0, 0x03F5 ], + [ 0x03F7, 0x0481 ], + [ 0x048A, 0x0513 ], + [ 0x0531, 0x0556 ], + [ 0x0559, 0x0559 ], + [ 0x0561, 0x0587 ], + [ 0x05D0, 0x05EA ], + [ 0x05F0, 0x05F2 ], + [ 0x0621, 0x063A ], + [ 0x0640, 0x064A ], + [ 0x066E, 0x066F ], + [ 0x0671, 0x06D3 ], + [ 0x06D5, 0x06D5 ], + [ 0x06E5, 0x06E6 ], + [ 0x06EE, 0x06EF ], + [ 0x06FA, 0x06FC ], + [ 0x06FF, 0x06FF ], + [ 0x0710, 0x0710 ], + [ 0x0712, 0x072F ], + [ 0x074D, 0x076D ], + [ 0x0780, 0x07A5 ], + [ 0x07B1, 0x07B1 ], + [ 0x07CA, 0x07EA ], + [ 0x07F4, 0x07F5 ], + [ 0x07FA, 0x07FA ], + [ 0x0904, 0x0939 ], + [ 0x093D, 0x093D ], + [ 0x0950, 0x0950 ], + [ 0x0958, 0x0961 ], + [ 0x097B, 0x097F ], + [ 0x0985, 0x098C ], + [ 0x098F, 0x0990 ], + [ 0x0993, 0x09A8 ], + [ 0x09AA, 0x09B0 ], + [ 0x09B2, 0x09B2 ], + [ 0x09B6, 0x09B9 ], + [ 0x09BD, 0x09BD ], + [ 0x09CE, 0x09CE ], + [ 0x09DC, 0x09DD ], + [ 0x09DF, 0x09E1 ], + [ 0x09F0, 0x09F1 ], + [ 0x0A05, 0x0A0A ], + [ 0x0A0F, 0x0A10 ], + [ 0x0A13, 0x0A28 ], + [ 0x0A2A, 0x0A30 ], + [ 0x0A32, 0x0A33 ], + [ 0x0A35, 0x0A36 ], + [ 0x0A38, 0x0A39 ], + [ 0x0A59, 0x0A5C ], + [ 0x0A5E, 0x0A5E ], + [ 0x0A72, 0x0A74 ], + [ 0x0A85, 0x0A8D ], + [ 0x0A8F, 0x0A91 ], + [ 0x0A93, 0x0AA8 ], + [ 0x0AAA, 0x0AB0 ], + [ 0x0AB2, 0x0AB3 ], + [ 0x0AB5, 0x0AB9 ], + [ 0x0ABD, 0x0ABD ], + [ 0x0AD0, 0x0AD0 ], + [ 0x0AE0, 0x0AE1 ], + [ 0x0B05, 0x0B0C ], + [ 0x0B0F, 0x0B10 ], + [ 0x0B13, 0x0B28 ], + [ 0x0B2A, 0x0B30 ], + [ 0x0B32, 0x0B33 ], + [ 0x0B35, 0x0B39 ], + [ 0x0B3D, 0x0B3D ], + [ 0x0B5C, 0x0B5D ], + [ 0x0B5F, 0x0B61 ], + [ 0x0B71, 0x0B71 ], + [ 0x0B83, 0x0B83 ], + [ 0x0B85, 0x0B8A ], + [ 0x0B8E, 0x0B90 ], + [ 0x0B92, 0x0B95 ], + [ 0x0B99, 0x0B9A ], + [ 0x0B9C, 0x0B9C ], + [ 0x0B9E, 0x0B9F ], + [ 0x0BA3, 0x0BA4 ], + [ 0x0BA8, 0x0BAA ], + [ 0x0BAE, 0x0BB9 ], + [ 0x0C05, 0x0C0C ], + [ 0x0C0E, 0x0C10 ], + [ 0x0C12, 0x0C28 ], + [ 0x0C2A, 0x0C33 ], + [ 0x0C35, 0x0C39 ], + [ 0x0C60, 0x0C61 ], + [ 0x0C85, 0x0C8C ], + [ 0x0C8E, 0x0C90 ], + [ 0x0C92, 0x0CA8 ], + [ 0x0CAA, 0x0CB3 ], + [ 0x0CB5, 0x0CB9 ], + [ 0x0CBD, 0x0CBD ], + [ 0x0CDE, 0x0CDE ], + [ 0x0CE0, 0x0CE1 ], + [ 0x0D05, 0x0D0C ], + [ 0x0D0E, 0x0D10 ], + [ 0x0D12, 0x0D28 ], + [ 0x0D2A, 0x0D39 ], + [ 0x0D60, 0x0D61 ], + [ 0x0D85, 0x0D96 ], + [ 0x0D9A, 0x0DB1 ], + [ 0x0DB3, 0x0DBB ], + [ 0x0DBD, 0x0DBD ], + [ 0x0DC0, 0x0DC6 ], + [ 0x0E01, 0x0E30 ], + [ 0x0E32, 0x0E33 ], + [ 0x0E40, 0x0E46 ], + [ 0x0E81, 0x0E82 ], + [ 0x0E84, 0x0E84 ], + [ 0x0E87, 0x0E88 ], + [ 0x0E8A, 0x0E8A ], + [ 0x0E8D, 0x0E8D ], + [ 0x0E94, 0x0E97 ], + [ 0x0E99, 0x0E9F ], + [ 0x0EA1, 0x0EA3 ], + [ 0x0EA5, 0x0EA5 ], + [ 0x0EA7, 0x0EA7 ], + [ 0x0EAA, 0x0EAB ], + [ 0x0EAD, 0x0EB0 ], + [ 0x0EB2, 0x0EB3 ], + [ 0x0EBD, 0x0EBD ], + [ 0x0EC0, 0x0EC4 ], + [ 0x0EC6, 0x0EC6 ], + [ 0x0EDC, 0x0EDD ], + [ 0x0F00, 0x0F00 ], + [ 0x0F40, 0x0F47 ], + [ 0x0F49, 0x0F6A ], + [ 0x0F88, 0x0F8B ], + [ 0x1000, 0x1021 ], + [ 0x1023, 0x1027 ], + [ 0x1029, 0x102A ], + [ 0x1050, 0x1055 ], + [ 0x10A0, 0x10C5 ], + [ 0x10D0, 0x10FA ], + [ 0x10FC, 0x10FC ], + [ 0x1100, 0x1159 ], + [ 0x115F, 0x11A2 ], + [ 0x11A8, 0x11F9 ], + [ 0x1200, 0x1248 ], + [ 0x124A, 0x124D ], + [ 0x1250, 0x1256 ], + [ 0x1258, 0x1258 ], + [ 0x125A, 0x125D ], + [ 0x1260, 0x1288 ], + [ 0x128A, 0x128D ], + [ 0x1290, 0x12B0 ], + [ 0x12B2, 0x12B5 ], + [ 0x12B8, 0x12BE ], + [ 0x12C0, 0x12C0 ], + [ 0x12C2, 0x12C5 ], + [ 0x12C8, 0x12D6 ], + [ 0x12D8, 0x1310 ], + [ 0x1312, 0x1315 ], + [ 0x1318, 0x135A ], + [ 0x1380, 0x138F ], + [ 0x13A0, 0x13F4 ], + [ 0x1401, 0x166C ], + [ 0x166F, 0x1676 ], + [ 0x1681, 0x169A ], + [ 0x16A0, 0x16EA ], + [ 0x1700, 0x170C ], + [ 0x170E, 0x1711 ], + [ 0x1720, 0x1731 ], + [ 0x1740, 0x1751 ], + [ 0x1760, 0x176C ], + [ 0x176E, 0x1770 ], + [ 0x1780, 0x17B3 ], + [ 0x17D7, 0x17D7 ], + [ 0x17DC, 0x17DC ], + [ 0x1820, 0x1877 ], + [ 0x1880, 0x18A8 ], + [ 0x1900, 0x191C ], + [ 0x1950, 0x196D ], + [ 0x1970, 0x1974 ], + [ 0x1980, 0x19A9 ], + [ 0x19C1, 0x19C7 ], + [ 0x1A00, 0x1A16 ], + [ 0x1B05, 0x1B33 ], + [ 0x1B45, 0x1B4B ], + [ 0x1D00, 0x1DBF ], + [ 0x1E00, 0x1E9B ], + [ 0x1EA0, 0x1EF9 ], + [ 0x1F00, 0x1F15 ], + [ 0x1F18, 0x1F1D ], + [ 0x1F20, 0x1F45 ], + [ 0x1F48, 0x1F4D ], + [ 0x1F50, 0x1F57 ], + [ 0x1F59, 0x1F59 ], + [ 0x1F5B, 0x1F5B ], + [ 0x1F5D, 0x1F5D ], + [ 0x1F5F, 0x1F7D ], + [ 0x1F80, 0x1FB4 ], + [ 0x1FB6, 0x1FBC ], + [ 0x1FBE, 0x1FBE ], + [ 0x1FC2, 0x1FC4 ], + [ 0x1FC6, 0x1FCC ], + [ 0x1FD0, 0x1FD3 ], + [ 0x1FD6, 0x1FDB ], + [ 0x1FE0, 0x1FEC ], + [ 0x1FF2, 0x1FF4 ], + [ 0x1FF6, 0x1FFC ], + [ 0x2071, 0x2071 ], + [ 0x207F, 0x207F ], + [ 0x2090, 0x2094 ], + [ 0x2102, 0x2102 ], + [ 0x2107, 0x2107 ], + [ 0x210A, 0x2113 ], + [ 0x2115, 0x2115 ], + [ 0x2119, 0x211D ], + [ 0x2124, 0x2124 ], + [ 0x2126, 0x2126 ], + [ 0x2128, 0x2128 ], + [ 0x212A, 0x212D ], + [ 0x212F, 0x2139 ], + [ 0x213C, 0x213F ], + [ 0x2145, 0x2149 ], + [ 0x214E, 0x214E ], + [ 0x2183, 0x2184 ], + [ 0x2C00, 0x2C2E ], + [ 0x2C30, 0x2C5E ], + [ 0x2C60, 0x2C6C ], + [ 0x2C74, 0x2C77 ], + [ 0x2C80, 0x2CE4 ], + [ 0x2D00, 0x2D25 ], + [ 0x2D30, 0x2D65 ], + [ 0x2D6F, 0x2D6F ], + [ 0x2D80, 0x2D96 ], + [ 0x2DA0, 0x2DA6 ], + [ 0x2DA8, 0x2DAE ], + [ 0x2DB0, 0x2DB6 ], + [ 0x2DB8, 0x2DBE ], + [ 0x2DC0, 0x2DC6 ], + [ 0x2DC8, 0x2DCE ], + [ 0x2DD0, 0x2DD6 ], + [ 0x2DD8, 0x2DDE ], + [ 0x3005, 0x3006 ], + [ 0x3031, 0x3035 ], + [ 0x303B, 0x303C ], + [ 0x3041, 0x3096 ], + [ 0x309D, 0x309F ], + [ 0x30A1, 0x30FA ], + [ 0x30FC, 0x30FF ], + [ 0x3105, 0x312C ], + [ 0x3131, 0x318E ], + [ 0x31A0, 0x31B7 ], + [ 0x31F0, 0x31FF ], + [ 0x3400, 0x4DB5 ], + [ 0x4E00, 0x9FBB ], + [ 0xA000, 0xA48C ], + [ 0xA717, 0xA71A ], + [ 0xA800, 0xA801 ], + [ 0xA803, 0xA805 ], + [ 0xA807, 0xA80A ], + [ 0xA80C, 0xA822 ], + [ 0xA840, 0xA873 ], + [ 0xAC00, 0xD7A3 ], + [ 0xF900, 0xFA2D ], + [ 0xFA30, 0xFA6A ], + [ 0xFA70, 0xFAD9 ], + [ 0xFB00, 0xFB06 ], + [ 0xFB13, 0xFB17 ], + [ 0xFB1D, 0xFB1D ], + [ 0xFB1F, 0xFB28 ], + [ 0xFB2A, 0xFB36 ], + [ 0xFB38, 0xFB3C ], + [ 0xFB3E, 0xFB3E ], + [ 0xFB40, 0xFB41 ], + [ 0xFB43, 0xFB44 ], + [ 0xFB46, 0xFBB1 ], + [ 0xFBD3, 0xFD3D ], + [ 0xFD50, 0xFD8F ], + [ 0xFD92, 0xFDC7 ], + [ 0xFDF0, 0xFDFB ], + [ 0xFE70, 0xFE74 ], + [ 0xFE76, 0xFEFC ], + [ 0xFF21, 0xFF3A ], + [ 0xFF41, 0xFF5A ], + [ 0xFF66, 0xFFBE ], + [ 0xFFC2, 0xFFC7 ], + [ 0xFFCA, 0xFFCF ], + [ 0xFFD2, 0xFFD7 ], + [ 0xFFDA, 0xFFDC ], + [ 0x10000, 0x1000B ], + [ 0x1000D, 0x10026 ], + [ 0x10028, 0x1003A ], + [ 0x1003C, 0x1003D ], + [ 0x1003F, 0x1004D ], + [ 0x10050, 0x1005D ], + [ 0x10080, 0x100FA ], + [ 0x10300, 0x1031E ], + [ 0x10330, 0x10340 ], + [ 0x10342, 0x10349 ], + [ 0x10380, 0x1039D ], + [ 0x103A0, 0x103C3 ], + [ 0x103C8, 0x103CF ], + [ 0x10400, 0x1049D ], + [ 0x10800, 0x10805 ], + [ 0x10808, 0x10808 ], + [ 0x1080A, 0x10835 ], + [ 0x10837, 0x10838 ], + [ 0x1083C, 0x1083C ], + [ 0x1083F, 0x1083F ], + [ 0x10900, 0x10915 ], + [ 0x10A00, 0x10A00 ], + [ 0x10A10, 0x10A13 ], + [ 0x10A15, 0x10A17 ], + [ 0x10A19, 0x10A33 ], + [ 0x12000, 0x1236E ], + [ 0x1D400, 0x1D454 ], + [ 0x1D456, 0x1D49C ], + [ 0x1D49E, 0x1D49F ], + [ 0x1D4A2, 0x1D4A2 ], + [ 0x1D4A5, 0x1D4A6 ], + [ 0x1D4A9, 0x1D4AC ], + [ 0x1D4AE, 0x1D4B9 ], + [ 0x1D4BB, 0x1D4BB ], + [ 0x1D4BD, 0x1D4C3 ], + [ 0x1D4C5, 0x1D505 ], + [ 0x1D507, 0x1D50A ], + [ 0x1D50D, 0x1D514 ], + [ 0x1D516, 0x1D51C ], + [ 0x1D51E, 0x1D539 ], + [ 0x1D53B, 0x1D53E ], + [ 0x1D540, 0x1D544 ], + [ 0x1D546, 0x1D546 ], + [ 0x1D54A, 0x1D550 ], + [ 0x1D552, 0x1D6A5 ], + [ 0x1D6A8, 0x1D6C0 ], + [ 0x1D6C2, 0x1D6DA ], + [ 0x1D6DC, 0x1D6FA ], + [ 0x1D6FC, 0x1D714 ], + [ 0x1D716, 0x1D734 ], + [ 0x1D736, 0x1D74E ], + [ 0x1D750, 0x1D76E ], + [ 0x1D770, 0x1D788 ], + [ 0x1D78A, 0x1D7A8 ], + [ 0x1D7AA, 0x1D7C2 ], + [ 0x1D7C4, 0x1D7CB ], + [ 0x20000, 0x2A6D6 ], + [ 0x2F800, 0x2FA1D ], + ]; + + debug + { + for (int i = 0; i < table.length; i++) + { + assert(table[i][0] <= table[i][1]); + if (i < table.length - 1) + { + if (table[i][1] >= table[i + 1][0]) + printf("table[%d][1] = x%x, table[%d][0] = x%x\n", i, table[i][1], i + 1, table[i + 1][0]); + assert(table[i][1] < table[i + 1][0]); + } + } + } + + if (u < 0xAA) + { + if (u < 'A') + goto Lisnot; + if (u <= 'Z') + goto Lis; + if (u < 'a') + goto Lisnot; + if (u <= 'z') + goto Lis; + goto Lisnot; + } + + // Binary search + uint mid; + uint low; + uint high; + + low = 0; + high = table.length - 1; + while (cast(int)low <= cast(int)high) + { + mid = (low + high) >> 1; + if (u < table[mid][0]) + high = mid - 1; + else if (u > table[mid][1]) + low = mid + 1; + else + goto Lis; + } + +Lisnot: + debug + { + for (int i = 0; i < table.length; i++) + { + assert(u < table[i][0] || u > table[i][1]); + } + } + return 0; + +Lis: + debug + { + for (int i = 0; i < table.length; i++) + { + if (u >= table[i][0] && u <= table[i][1]) + return 1; + } + assert(0); // should have been in table + } + return 1; +} + +unittest +{ + for (uint i = 0; i < 0x80; i++) + { + if (i >= 'A' && i <= 'Z') + assert(isUniAlpha(i)); + else if (i >= 'a' && i <= 'z') + assert(isUniAlpha(i)); + else + assert(!isUniAlpha(i)); + } +} diff -r 4d36eea1bbc9 -r 33b566df6af4 trunk/src/std/utf.d --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/std/utf.d Sat Sep 15 17:12:26 2007 +0200 @@ -0,0 +1,975 @@ +// utf.d + +/* + * Copyright (C) 2003-2004 by Digital Mars, www.digitalmars.com + * Written by Walter Bright + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * o The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * o Altered source versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * o This notice may not be removed or altered from any source + * distribution. + */ + +/******************************************** + * Encode and decode UTF-8, UTF-16 and UTF-32 strings. + * + * For Win32 systems, the C wchar_t type is UTF-16 and corresponds to the D + * wchar type. + * For linux systems, the C wchar_t type is UTF-32 and corresponds to + * the D utf.dchar type. + * + * UTF character support is restricted to (\u0000 <= character <= \U0010FFFF). + * + * See_Also: + * $(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia)
+ * $(LINK http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8)
+ * $(LINK http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335) + * Macros: + * WIKI = Phobos/StdUtf + */ + +/* + Note: this is not the original file! + Modified by Aziz Köksal: + Only commented out deprecated class UtfError. +*/ + +module std.utf; + +// private import std.stdio; + +//debug=utf; // uncomment to turn on debugging printf's +/+ +deprecated class UtfError : Error +{ + size_t idx; // index in string of where error occurred + + this(char[] s, size_t i) + { + idx = i; + super(s); + } +} ++/ +/********************************** + * Exception class that is thrown upon any errors. + */ + +class UtfException : Exception +{ + size_t idx; /// index in string of where error occurred + + this(char[] s, size_t i) + { + idx = i; + super(s); + } +} + +/******************************* + * Test if c is a valid UTF-32 character. + * + * \uFFFE and \uFFFF are considered valid by this function, + * as they are permitted for internal use by an application, + * but they are not allowed for interchange by the Unicode standard. + * + * Returns: true if it is, false if not. + */ + +bool isValidDchar(dchar c) +{ + /* Note: FFFE and FFFF are specifically permitted by the + * Unicode standard for application internal use, but are not + * allowed for interchange. + * (thanks to Arcane Jill) + */ + + return c < 0xD800 || + (c > 0xDFFF && c <= 0x10FFFF /*&& c != 0xFFFE && c != 0xFFFF*/); +} + +unittest +{ + debug(utf) printf("utf.isValidDchar.unittest\n"); + assert(isValidDchar(cast(dchar)'a') == true); + assert(isValidDchar(cast(dchar)0x1FFFFF) == false); +} + + +ubyte[256] UTF8stride = +[ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, +]; + +/** + * stride() returns the length of a UTF-8 sequence starting at index i + * in string s. + * Returns: + * The number of bytes in the UTF-8 sequence or + * 0xFF meaning s[i] is not the start of of UTF-8 sequence. + */ + +uint stride(char[] s, size_t i) +{ + return UTF8stride[s[i]]; +} + +/** + * stride() returns the length of a UTF-16 sequence starting at index i + * in string s. + */ + +uint stride(wchar[] s, size_t i) +{ uint u = s[i]; + return 1 + (u >= 0xD800 && u <= 0xDBFF); +} + +/** + * stride() returns the length of a UTF-32 sequence starting at index i + * in string s. + * Returns: The return value will always be 1. + */ + +uint stride(dchar[] s, size_t i) +{ + return 1; +} + +/******************************************* + * Given an index i into an array of characters s[], + * and assuming that index i is at the start of a UTF character, + * determine the number of UCS characters up to that index i. + */ + +size_t toUCSindex(char[] s, size_t i) +{ + size_t n; + size_t j; + size_t stride; + + for (j = 0; j < i; j += stride) + { + stride = UTF8stride[s[j]]; + if (stride == 0xFF) + goto Lerr; + n++; + } + if (j > i) + { + Lerr: + throw new UtfException("1invalid UTF-8 sequence", j); + } + return n; +} + +/** ditto */ + +size_t toUCSindex(wchar[] s, size_t i) +{ + size_t n; + size_t j; + + for (j = 0; j < i; ) + { uint u = s[j]; + + j += 1 + (u >= 0xD800 && u <= 0xDBFF); + n++; + } + if (j > i) + { + Lerr: + throw new UtfException("2invalid UTF-16 sequence", j); + } + return n; +} + +/** ditto */ + +size_t toUCSindex(dchar[] s, size_t i) +{ + return i; +} + +/****************************************** + * Given a UCS index n into an array of characters s[], return the UTF index. + */ + +size_t toUTFindex(char[] s, size_t n) +{ + size_t i; + + while (n--) + { + uint j = UTF8stride[s[i]]; + if (j == 0xFF) + throw new UtfException("3invalid UTF-8 sequence", i); + i += j; + } + return i; +} + +/** ditto */ + +size_t toUTFindex(wchar[] s, size_t n) +{ + size_t i; + + while (n--) + { wchar u = s[i]; + + i += 1 + (u >= 0xD800 && u <= 0xDBFF); + } + return i; +} + +/** ditto */ + +size_t toUTFindex(dchar[] s, size_t n) +{ + return n; +} + +/* =================== Decode ======================= */ + +/*************** + * Decodes and returns character starting at s[idx]. idx is advanced past the + * decoded character. If the character is not well formed, a UtfException is + * thrown and idx remains unchanged. + */ + +dchar decode(char[] s, inout size_t idx) + in + { + assert(idx >= 0 && idx < s.length); + } + out (result) + { + assert(isValidDchar(result)); + } + body + { + size_t len = s.length; + dchar V; + size_t i = idx; + char u = s[i]; + + if (u & 0x80) + { uint n; + char u2; + + /* The following encodings are valid, except for the 5 and 6 byte + * combinations: + * 0xxxxxxx + * 110xxxxx 10xxxxxx + * 1110xxxx 10xxxxxx 10xxxxxx + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + for (n = 1; ; n++) + { + if (n > 4) + goto Lerr; // only do the first 4 of 6 encodings + if (((u << n) & 0x80) == 0) + { + if (n == 1) + goto Lerr; + break; + } + } + + // Pick off (7 - n) significant bits of B from first byte of octet + V = cast(dchar)(u & ((1 << (7 - n)) - 1)); + + if (i + (n - 1) >= len) + goto Lerr; // off end of string + + /* The following combinations are overlong, and illegal: + * 1100000x (10xxxxxx) + * 11100000 100xxxxx (10xxxxxx) + * 11110000 1000xxxx (10xxxxxx 10xxxxxx) + * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) + * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) + */ + u2 = s[i + 1]; + if ((u & 0xFE) == 0xC0 || + (u == 0xE0 && (u2 & 0xE0) == 0x80) || + (u == 0xF0 && (u2 & 0xF0) == 0x80) || + (u == 0xF8 && (u2 & 0xF8) == 0x80) || + (u == 0xFC && (u2 & 0xFC) == 0x80)) + goto Lerr; // overlong combination + + for (uint j = 1; j != n; j++) + { + u = s[i + j]; + if ((u & 0xC0) != 0x80) + goto Lerr; // trailing bytes are 10xxxxxx + V = (V << 6) | (u & 0x3F); + } + if (!isValidDchar(V)) + goto Lerr; + i += n; + } + else + { + V = cast(dchar) u; + i++; + } + + idx = i; + return V; + + Lerr: + //printf("\ndecode: idx = %d, i = %d, length = %d s = \n'%.*s'\n%x\n'%.*s'\n", idx, i, s.length, s, s[i], s[i .. length]); + throw new UtfException("4invalid UTF-8 sequence", i); + } + +unittest +{ size_t i; + dchar c; + + debug(utf) printf("utf.decode.unittest\n"); + + static char[] s1 = "abcd"; + i = 0; + c = decode(s1, i); + assert(c == cast(dchar)'a'); + assert(i == 1); + c = decode(s1, i); + assert(c == cast(dchar)'b'); + assert(i == 2); + + static char[] s2 = "\xC2\xA9"; + i = 0; + c = decode(s2, i); + assert(c == cast(dchar)'\u00A9'); + assert(i == 2); + + static char[] s3 = "\xE2\x89\xA0"; + i = 0; + c = decode(s3, i); + assert(c == cast(dchar)'\u2260'); + assert(i == 3); + + static char[][] s4 = + [ "\xE2\x89", // too short + "\xC0\x8A", + "\xE0\x80\x8A", + "\xF0\x80\x80\x8A", + "\xF8\x80\x80\x80\x8A", + "\xFC\x80\x80\x80\x80\x8A", + ]; + + for (int j = 0; j < s4.length; j++) + { + try + { + i = 0; + c = decode(s4[j], i); + assert(0); + } + catch (UtfException u) + { + i = 23; + delete u; + } + assert(i == 23); + } +} + +/** ditto */ + +dchar decode(wchar[] s, inout size_t idx) + in + { + assert(idx >= 0 && idx < s.length); + } + out (result) + { + assert(isValidDchar(result)); + } + body + { + char[] msg; + dchar V; + size_t i = idx; + uint u = s[i]; + + if (u & ~0x7F) + { if (u >= 0xD800 && u <= 0xDBFF) + { uint u2; + + if (i + 1 == s.length) + { msg = "surrogate UTF-16 high value past end of string"; + goto Lerr; + } + u2 = s[i + 1]; + if (u2 < 0xDC00 || u2 > 0xDFFF) + { msg = "surrogate UTF-16 low value out of range"; + goto Lerr; + } + u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00); + i += 2; + } + else if (u >= 0xDC00 && u <= 0xDFFF) + { msg = "unpaired surrogate UTF-16 value"; + goto Lerr; + } + else if (u == 0xFFFE || u == 0xFFFF) + { msg = "illegal UTF-16 value"; + goto Lerr; + } + else + i++; + } + else + { + i++; + } + + idx = i; + return cast(dchar)u; + + Lerr: + throw new UtfException(msg, i); + } + +/** ditto */ + +dchar decode(dchar[] s, inout size_t idx) + in + { + assert(idx >= 0 && idx < s.length); + } + body + { + size_t i = idx; + dchar c = s[i]; + + if (!isValidDchar(c)) + goto Lerr; + idx = i + 1; + return c; + + Lerr: + throw new UtfException("5invalid UTF-32 value", i); + } + + +/* =================== Encode ======================= */ + +/******************************* + * Encodes character c and appends it to array s[]. + */ + +void encode(inout char[] s, dchar c) + in + { + assert(isValidDchar(c)); + } + body + { + char[] r = s; + + if (c <= 0x7F) + { + r ~= cast(char) c; + } + else + { + char[4] buf; + uint L; + + if (c <= 0x7FF) + { + buf[0] = cast(char)(0xC0 | (c >> 6)); + buf[1] = cast(char)(0x80 | (c & 0x3F)); + L = 2; + } + else if (c <= 0xFFFF) + { + buf[0] = cast(char)(0xE0 | (c >> 12)); + buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); + buf[2] = cast(char)(0x80 | (c & 0x3F)); + L = 3; + } + else if (c <= 0x10FFFF) + { + buf[0] = cast(char)(0xF0 | (c >> 18)); + buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); + buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); + buf[3] = cast(char)(0x80 | (c & 0x3F)); + L = 4; + } + else + { + assert(0); + } + r ~= buf[0 .. L]; + } + s = r; + } + +unittest +{ + debug(utf) printf("utf.encode.unittest\n"); + + char[] s = "abcd"; + encode(s, cast(dchar)'a'); + assert(s.length == 5); + assert(s == "abcda"); + + encode(s, cast(dchar)'\u00A9'); + assert(s.length == 7); + assert(s == "abcda\xC2\xA9"); + //assert(s == "abcda\u00A9"); // BUG: fix compiler + + encode(s, cast(dchar)'\u2260'); + assert(s.length == 10); + assert(s == "abcda\xC2\xA9\xE2\x89\xA0"); +} + +/** ditto */ + +void encode(inout wchar[] s, dchar c) + in + { + assert(isValidDchar(c)); + } + body + { + wchar[] r = s; + + if (c <= 0xFFFF) + { + r ~= cast(wchar) c; + } + else + { + wchar[2] buf; + + buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); + buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00); + r ~= buf; + } + s = r; + } + +/** ditto */ + +void encode(inout dchar[] s, dchar c) + in + { + assert(isValidDchar(c)); + } + body + { + s ~= c; + } + +/* =================== Validation ======================= */ + +/*********************************** + * Checks to see if string is well formed or not. Throws a UtfException if it is + * not. Use to check all untrusted input for correctness. + */ + +void validate(char[] s) +{ + size_t len = s.length; + size_t i; + + for (i = 0; i < len; ) + { + decode(s, i); + } +} + +/** ditto */ + +void validate(wchar[] s) +{ + size_t len = s.length; + size_t i; + + for (i = 0; i < len; ) + { + decode(s, i); + } +} + +/** ditto */ + +void validate(dchar[] s) +{ + size_t len = s.length; + size_t i; + + for (i = 0; i < len; ) + { + decode(s, i); + } +} + +/* =================== Conversion to UTF8 ======================= */ + +char[] toUTF8(char[4] buf, dchar c) + in + { + assert(isValidDchar(c)); + } + body + { + if (c <= 0x7F) + { + buf[0] = cast(char) c; + return buf[0 .. 1]; + } + else if (c <= 0x7FF) + { + buf[0] = cast(char)(0xC0 | (c >> 6)); + buf[1] = cast(char)(0x80 | (c & 0x3F)); + return buf[0 .. 2]; + } + else if (c <= 0xFFFF) + { + buf[0] = cast(char)(0xE0 | (c >> 12)); + buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); + buf[2] = cast(char)(0x80 | (c & 0x3F)); + return buf[0 .. 3]; + } + else if (c <= 0x10FFFF) + { + buf[0] = cast(char)(0xF0 | (c >> 18)); + buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); + buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); + buf[3] = cast(char)(0x80 | (c & 0x3F)); + return buf[0 .. 4]; + } + assert(0); + } + +/******************* + * Encodes string s into UTF-8 and returns the encoded string. + */ + +char[] toUTF8(char[] s) + in + { + validate(s); + } + body + { + return s; + } + +/** ditto */ + +char[] toUTF8(wchar[] s) +{ + char[] r; + size_t i; + size_t slen = s.length; + + r.length = slen; + + for (i = 0; i < slen; i++) + { wchar c = s[i]; + + if (c <= 0x7F) + r[i] = cast(char)c; // fast path for ascii + else + { + r.length = i; + foreach (dchar c; s[i .. slen]) + { + encode(r, c); + } + break; + } + } + return r; +} + +/** ditto */ + +char[] toUTF8(dchar[] s) +{ + char[] r; + size_t i; + size_t slen = s.length; + + r.length = slen; + + for (i = 0; i < slen; i++) + { dchar c = s[i]; + + if (c <= 0x7F) + r[i] = cast(char)c; // fast path for ascii + else + { + r.length = i; + foreach (dchar d; s[i .. slen]) + { + encode(r, d); + } + break; + } + } + return r; +} + +/* =================== Conversion to UTF16 ======================= */ + +wchar[] toUTF16(wchar[2] buf, dchar c) + in + { + assert(isValidDchar(c)); + } + body + { + if (c <= 0xFFFF) + { + buf[0] = cast(wchar) c; + return buf[0 .. 1]; + } + else + { + buf[0] = cast(wchar) ((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); + buf[1] = cast(wchar) (((c - 0x10000) & 0x3FF) + 0xDC00); + return buf[0 .. 2]; + } + } + +/**************** + * Encodes string s into UTF-16 and returns the encoded string. + * toUTF16z() is suitable for calling the 'W' functions in the Win32 API that take + * an LPWSTR or LPCWSTR argument. + */ + +wchar[] toUTF16(char[] s) +{ + wchar[] r; + size_t slen = s.length; + + r.length = slen; + r.length = 0; + for (size_t i = 0; i < slen; ) + { + dchar c = s[i]; + if (c <= 0x7F) + { + i++; + r ~= cast(wchar)c; + } + else + { + c = decode(s, i); + encode(r, c); + } + } + return r; +} + +/** ditto */ + +wchar* toUTF16z(char[] s) +{ + wchar[] r; + size_t slen = s.length; + + r.length = slen + 1; + r.length = 0; + for (size_t i = 0; i < slen; ) + { + dchar c = s[i]; + if (c <= 0x7F) + { + i++; + r ~= cast(wchar)c; + } + else + { + c = decode(s, i); + encode(r, c); + } + } + r ~= "\000"; + return r.ptr; +} + +/** ditto */ + +wchar[] toUTF16(wchar[] s) + in + { + validate(s); + } + body + { + return s; + } + +/** ditto */ + +wchar[] toUTF16(dchar[] s) +{ + wchar[] r; + size_t slen = s.length; + + r.length = slen; + r.length = 0; + for (size_t i = 0; i < slen; i++) + { + encode(r, s[i]); + } + return r; +} + +/* =================== Conversion to UTF32 ======================= */ + +/***** + * Encodes string s into UTF-32 and returns the encoded string. + */ + +dchar[] toUTF32(char[] s) +{ + dchar[] r; + size_t slen = s.length; + size_t j = 0; + + r.length = slen; // r[] will never be longer than s[] + for (size_t i = 0; i < slen; ) + { + dchar c = s[i]; + if (c >= 0x80) + c = decode(s, i); + else + i++; // c is ascii, no need for decode + r[j++] = c; + } + return r[0 .. j]; +} + +/** ditto */ + +dchar[] toUTF32(wchar[] s) +{ + dchar[] r; + size_t slen = s.length; + size_t j = 0; + + r.length = slen; // r[] will never be longer than s[] + for (size_t i = 0; i < slen; ) + { + dchar c = s[i]; + if (c >= 0x80) + c = decode(s, i); + else + i++; // c is ascii, no need for decode + r[j++] = c; + } + return r[0 .. j]; +} + +/** ditto */ + +dchar[] toUTF32(dchar[] s) + in + { + validate(s); + } + body + { + return s; + } + +/* ================================ tests ================================== */ + +unittest +{ + debug(utf) printf("utf.toUTF.unittest\n"); + + char[] c; + wchar[] w; + dchar[] d; + + c = "hello"; + w = toUTF16(c); + assert(w == "hello"); + d = toUTF32(c); + assert(d == "hello"); + + c = toUTF8(w); + assert(c == "hello"); + d = toUTF32(w); + assert(d == "hello"); + + c = toUTF8(d); + assert(c == "hello"); + w = toUTF16(d); + assert(w == "hello"); + + + c = "hel\u1234o"; + w = toUTF16(c); + assert(w == "hel\u1234o"); + d = toUTF32(c); + assert(d == "hel\u1234o"); + + c = toUTF8(w); + assert(c == "hel\u1234o"); + d = toUTF32(w); + assert(d == "hel\u1234o"); + + c = toUTF8(d); + assert(c == "hel\u1234o"); + w = toUTF16(d); + assert(w == "hel\u1234o"); + + + c = "he\U0010AAAAllo"; + w = toUTF16(c); + //foreach (wchar c; w) printf("c = x%x\n", c); + //foreach (wchar c; cast(wchar[])"he\U0010AAAAllo") printf("c = x%x\n", c); + assert(w == "he\U0010AAAAllo"); + d = toUTF32(c); + assert(d == "he\U0010AAAAllo"); + + c = toUTF8(w); + assert(c == "he\U0010AAAAllo"); + d = toUTF32(w); + assert(d == "he\U0010AAAAllo"); + + c = toUTF8(d); + assert(c == "he\U0010AAAAllo"); + w = toUTF16(d); + assert(w == "he\U0010AAAAllo"); +}