Mercurial > projects > ldc
diff lphobos/std/string.d @ 108:288fe1029e1f trunk
[svn r112] Fixed 'case 1,2,3:' style case statements.
Fixed a bunch of bugs with return/break/continue in loops.
Fixed support for the DMDFE hidden implicit return value variable. This can be needed for some foreach statements where the loop body is converted to a nested delegate, but also possibly returns from the function.
Added std.math to phobos.
Added AA runtime support code, done ground work for implementing AAs.
Several other bugfixes.
author | lindquist |
---|---|
date | Tue, 20 Nov 2007 05:29:20 +0100 |
parents | |
children | 373489eeaf90 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lphobos/std/string.d Tue Nov 20 05:29:20 2007 +0100 @@ -0,0 +1,4083 @@ + +// Written in the D programming language. + +/** + * String handling functions. + * + * To copy or not to copy? + * When a function takes a string as a parameter, and returns a string, + * is that string the same as the input string, modified in place, or + * is it a modified copy of the input string? The D array convention is + * "copy-on-write". This means that if no modifications are done, the + * original string (or slices of it) can be returned. If any modifications + * are done, the returned string is a copy. + * + * Macros: + * WIKI = Phobos/StdString + * Copyright: + * Public Domain + */ + +/* Author: + * Walter Bright, Digital Mars, www.digitalmars.com + */ + +// The code is not optimized for speed, that will have to wait +// until the design is solidified. + +module std.string; + +//debug=string; // uncomment to turn on debugging printf's + +//private import std.stdio; +private import std.c.stdio; +private import std.c.stdlib; +private import std.c.string; +private import std.utf; +private import std.uni; +private import std.array; +private import std.format; +private import std.ctype; +private import std.stdarg; + +extern (C) +{ + + size_t wcslen(wchar *); + int wcscmp(wchar *, wchar *); +} + +/* ************* Exceptions *************** */ + +/// Thrown on errors in string functions. +class StringException : Exception +{ + this(char[] msg) /// Constructor + { + super(msg); + } +} + +/* ************* Constants *************** */ + +const char[16] hexdigits = "0123456789ABCDEF"; /// 0..9A..F +const char[10] digits = "0123456789"; /// 0..9 +const char[8] octdigits = "01234567"; /// 0..7 +const char[26] lowercase = "abcdefghijklmnopqrstuvwxyz"; /// a..z +const char[26] uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /// A..Z +const char[52] letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; /// A..Za..z +const char[6] whitespace = " \t\v\r\n\f"; /// ASCII whitespace + +const dchar LS = '\u2028'; /// UTF line separator +const dchar PS = '\u2029'; /// UTF paragraph separator + +/// Newline sequence for this system +version (Windows) + const char[2] newline = "\r\n"; +else version (linux) + const char[1] newline = "\n"; + +/********************************** + * Returns true if c is whitespace + */ + +bool iswhite(dchar c) +{ + return (c <= 0x7F) + ? find(whitespace, c) != -1 + : (c == PS || c == LS); +} + +/********************************* + * Convert string to integer. + */ + +long atoi(char[] s) +{ + return std.c.stdlib.atoi(toStringz(s)); +} + +/************************************* + * Convert string to real. + */ + +real atof(char[] s) +{ char* endptr; + + auto result = strtold(toStringz(s), &endptr); + return result; +} + +/********************************** + * Compare two strings. cmp is case sensitive, icmp is case insensitive. + * Returns: + * <table border=1 cellpadding=4 cellspacing=0> + * $(TR $(TD < 0) $(TD s1 < s2)) + * $(TR $(TD = 0) $(TD s1 == s2)) + * $(TR $(TD > 0) $(TD s1 > s2)) + * </table> + */ + +int cmp(char[] s1, char[] s2) +{ + auto len = s1.length; + int result; + + //printf("cmp('%.*s', '%.*s')\n", s1, s2); + if (s2.length < len) + len = s2.length; + result = memcmp(s1.ptr, s2.ptr, len); + if (result == 0) + result = cast(int)s1.length - cast(int)s2.length; + return result; +} + +/********************************* + * ditto + */ + +int icmp(char[] s1, char[] s2) +{ + auto len = s1.length; + int result; + + if (s2.length < len) + len = s2.length; + version (Win32) + { + result = memicmp(s1.ptr, s2.ptr, len); + } + version (linux) + { + for (size_t i = 0; i < len; i++) + { + if (s1[i] != s2[i]) + { + char c1 = s1[i]; + char c2 = s2[i]; + + if (c1 >= 'A' && c1 <= 'Z') + c1 += cast(int)'a' - cast(int)'A'; + if (c2 >= 'A' && c2 <= 'Z') + c2 += cast(int)'a' - cast(int)'A'; + result = cast(int)c1 - cast(int)c2; + if (result) + break; + } + } + } + if (result == 0) + result = cast(int)s1.length - cast(int)s2.length; + return result; +} + +unittest +{ + int result; + + debug(string) printf("string.cmp.unittest\n"); + result = icmp("abc", "abc"); + assert(result == 0); + result = icmp(null, null); + assert(result == 0); + result = icmp("", ""); + assert(result == 0); + result = icmp("abc", "abcd"); + assert(result < 0); + result = icmp("abcd", "abc"); + assert(result > 0); + result = icmp("abc", "abd"); + assert(result < 0); + result = icmp("bbc", "abc"); + assert(result > 0); +} + +/* ******************************** + * Converts a D array of chars to a C-style 0 terminated string. + * Deprecated: replaced with toStringz(). + */ + +deprecated char* toCharz(char[] s) +{ + return toStringz(s); +} + +/********************************* + * Convert array of chars s[] to a C-style 0 terminated string. + */ + +char* toStringz(char[] s) + in + { + } + out (result) + { + if (result) + { assert(strlen(result) == s.length); + assert(memcmp(result, s.ptr, s.length) == 0); + } + } + body + { + char[] copy; + + if (s.length == 0) + return ""; + + /+ Unfortunately, this isn't reliable. + We could make this work if string literals are put + in read-only memory and we test if s[] is pointing into + that. + + /* Peek past end of s[], if it's 0, no conversion necessary. + * Note that the compiler will put a 0 past the end of static + * strings, and the storage allocator will put a 0 past the end + * of newly allocated char[]'s. + */ + char* p = &s[0] + s.length; + if (*p == 0) + return s; + +/ + + // Need to make a copy + copy = new char[s.length + 1]; + copy[0..s.length] = s; + copy[s.length] = 0; + return copy.ptr; + } + +unittest +{ + debug(string) printf("string.toStringz.unittest\n"); + + char* p = toStringz("foo"); + assert(strlen(p) == 3); + char foo[] = "abbzxyzzy"; + p = toStringz(foo[3..5]); + assert(strlen(p) == 2); + + char[] test = ""; + p = toStringz(test); + assert(*p == 0); +} + +/****************************************** + * find, ifind _find first occurrence of c in string s. + * rfind, irfind _find last occurrence of c in string s. + * + * find, rfind are case sensitive; ifind, irfind are case insensitive. + * Returns: + * Index in s where c is found, -1 if not found. + */ + +int find(char[] s, dchar c) +{ + if (c <= 0x7F) + { // Plain old ASCII + auto p = cast(char*)memchr(s.ptr, c, s.length); + if (p) + return p - cast(char *)s; + else + return -1; + } + + // c is a universal character + foreach (int i, dchar c2; s) + { + if (c == c2) + return i; + } + return -1; +} + +unittest +{ + debug(string) printf("string.find.unittest\n"); + + int i; + + i = find(null, cast(dchar)'a'); + assert(i == -1); + i = find("def", cast(dchar)'a'); + assert(i == -1); + i = find("abba", cast(dchar)'a'); + assert(i == 0); + i = find("def", cast(dchar)'f'); + assert(i == 2); +} + + +/****************************************** + * ditto + */ + +int ifind(char[] s, dchar c) +{ + char* p; + + if (c <= 0x7F) + { // Plain old ASCII + char c1 = cast(char) std.ctype.tolower(c); + + foreach (int i, char c2; s) + { + c2 = cast(char)std.ctype.tolower(c2); + if (c1 == c2) + return i; + } + } + else + { // c is a universal character + dchar c1 = std.uni.toUniLower(c); + + foreach (int i, dchar c2; s) + { + c2 = std.uni.toUniLower(c2); + if (c1 == c2) + return i; + } + } + return -1; +} + +unittest +{ + debug(string) printf("string.ifind.unittest\n"); + + int i; + + i = ifind(null, cast(dchar)'a'); + assert(i == -1); + i = ifind("def", cast(dchar)'a'); + assert(i == -1); + i = ifind("Abba", cast(dchar)'a'); + assert(i == 0); + i = ifind("def", cast(dchar)'F'); + assert(i == 2); + + char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; + + i = ifind("def", cast(char)'f'); + assert(i == 2); + + i = ifind(sPlts, cast(char)'P'); + assert(i == 23); + i = ifind(sPlts, cast(char)'R'); + assert(i == 2); +} + + +/****************************************** + * ditto + */ + +int rfind(char[] s, dchar c) +{ + size_t i; + + if (c <= 0x7F) + { // Plain old ASCII + for (i = s.length; i-- != 0;) + { + if (s[i] == c) + break; + } + return i; + } + + // c is a universal character + char[4] buf; + char[] t; + t = std.utf.toUTF8(buf, c); + return rfind(s, t); +} + +unittest +{ + debug(string) printf("string.rfind.unittest\n"); + + int i; + + i = rfind(null, cast(dchar)'a'); + assert(i == -1); + i = rfind("def", cast(dchar)'a'); + assert(i == -1); + i = rfind("abba", cast(dchar)'a'); + assert(i == 3); + i = rfind("def", cast(dchar)'f'); + assert(i == 2); +} + +/****************************************** + * ditto + */ + +int irfind(char[] s, dchar c) +{ + size_t i; + + if (c <= 0x7F) + { // Plain old ASCII + char c1 = cast(char) std.ctype.tolower(c); + + for (i = s.length; i-- != 0;) + { char c2 = s[i]; + + c2 = cast(char) std.ctype.tolower(c2); + if (c1 == c2) + break; + } + } + else + { // c is a universal character + dchar c1 = std.uni.toUniLower(c); + + for (i = s.length; i-- != 0;) + { char cx = s[i]; + + if (cx <= 0x7F) + continue; // skip, since c is not ASCII + if ((cx & 0xC0) == 0x80) + continue; // skip non-starting UTF-8 chars + + size_t j = i; + dchar c2 = std.utf.decode(s, j); + c2 = std.uni.toUniLower(c2); + if (c1 == c2) + break; + } + } + return i; +} + +unittest +{ + debug(string) printf("string.irfind.unittest\n"); + + int i; + + i = irfind(null, cast(dchar)'a'); + assert(i == -1); + i = irfind("def", cast(dchar)'a'); + assert(i == -1); + i = irfind("AbbA", cast(dchar)'a'); + assert(i == 3); + i = irfind("def", cast(dchar)'F'); + assert(i == 2); + + char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; + + i = irfind("def", cast(char)'f'); + assert(i == 2); + + i = irfind(sPlts, cast(char)'M'); + assert(i == 34); + i = irfind(sPlts, cast(char)'S'); + assert(i == 40); +} + + +/****************************************** + * find, ifind _find first occurrence of sub[] in string s[]. + * rfind, irfind _find last occurrence of sub[] in string s[]. + * + * find, rfind are case sensitive; ifind, irfind are case insensitive. + * Returns: + * Index in s where c is found, -1 if not found. + */ + +int find(char[] s, char[] sub) + out (result) + { + if (result == -1) + { + } + else + { + assert(0 <= result && result < s.length - sub.length + 1); + assert(memcmp(&s[result], sub.ptr, sub.length) == 0); + } + } + body + { + auto sublength = sub.length; + + if (sublength == 0) + return 0; + + if (s.length >= sublength) + { + auto c = sub[0]; + if (sublength == 1) + { + auto p = cast(char*)memchr(s.ptr, c, s.length); + if (p) + return p - &s[0]; + } + else + { + size_t imax = s.length - sublength + 1; + + // Remainder of sub[] + char *q = &sub[1]; + sublength--; + + for (size_t i = 0; i < imax; i++) + { + char *p = cast(char*)memchr(&s[i], c, imax - i); + if (!p) + break; + i = p - &s[0]; + if (memcmp(p + 1, q, sublength) == 0) + return i; + } + } + } + return -1; + } + + +unittest +{ + debug(string) printf("string.find.unittest\n"); + + int i; + + i = find(null, "a"); + assert(i == -1); + i = find("def", "a"); + assert(i == -1); + i = find("abba", "a"); + assert(i == 0); + i = find("def", "f"); + assert(i == 2); + i = find("dfefffg", "fff"); + assert(i == 3); + i = find("dfeffgfff", "fff"); + assert(i == 6); +} + +/****************************************** + * ditto + */ + +int ifind(char[] s, char[] sub) + out (result) + { + if (result == -1) + { + } + else + { + assert(0 <= result && result < s.length - sub.length + 1); + assert(icmp(s[result .. result + sub.length], sub) == 0); + } + } + body + { + auto sublength = sub.length; + int i; + + if (sublength == 0) + return 0; + + if (s.length < sublength) + return -1; + + auto c = sub[0]; + if (sublength == 1) + { + i = ifind(s, c); + } + else if (c <= 0x7F) + { + size_t imax = s.length - sublength + 1; + + // Remainder of sub[] + char[] subn = sub[1 .. sublength]; + + for (i = 0; i < imax; i++) + { + auto j = ifind(s[i .. imax], c); + if (j == -1) + return -1; + i += j; + if (icmp(s[i + 1 .. i + sublength], subn) == 0) + return i; + } + i = -1; + } + else + { + size_t imax = s.length - sublength; + + for (i = 0; i <= imax; i++) + { + if (icmp(s[i .. i + sublength], sub) == 0) + return i; + } + i = -1; + } + return i; + } + + +unittest +{ + debug(string) printf("string.ifind.unittest\n"); + + int i; + + i = ifind(null, "a"); + assert(i == -1); + i = ifind("def", "a"); + assert(i == -1); + i = ifind("abba", "a"); + assert(i == 0); + i = ifind("def", "f"); + assert(i == 2); + i = ifind("dfefffg", "fff"); + assert(i == 3); + i = ifind("dfeffgfff", "fff"); + assert(i == 6); + + char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; + char[] sMars = "Who\'s \'My Favorite Maritian?\'"; + + i = ifind(sMars, "MY fAVe"); + assert(i == -1); + i = ifind(sMars, "mY fAVOriTe"); + assert(i == 7); + i = ifind(sPlts, "mArS:"); + assert(i == 0); + i = ifind(sPlts, "rOcK"); + assert(i == 17); + i = ifind(sPlts, "Un."); + assert(i == 41); + i = ifind(sPlts, sPlts); + assert(i == 0); + + i = ifind("\u0100", "\u0100"); + assert(i == 0); + + // Thanks to Carlos Santander B. and zwang + i = ifind("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y", + "page-break-before"); + assert(i == -1); +} + +/****************************************** + * ditto + */ + +int rfind(char[] s, char[] sub) + out (result) + { + if (result == -1) + { + } + else + { + assert(0 <= result && result < s.length - sub.length + 1); + assert(memcmp(&s[0] + result, sub.ptr, sub.length) == 0); + } + } + body + { + char c; + + if (sub.length == 0) + return s.length; + c = sub[0]; + if (sub.length == 1) + return rfind(s, c); + for (int i = s.length - sub.length; i >= 0; i--) + { + if (s[i] == c) + { + if (memcmp(&s[i + 1], &sub[1], sub.length - 1) == 0) + return i; + } + } + return -1; + } + +unittest +{ + int i; + + debug(string) printf("string.rfind.unittest\n"); + i = rfind("abcdefcdef", "c"); + assert(i == 6); + i = rfind("abcdefcdef", "cd"); + assert(i == 6); + i = rfind("abcdefcdef", "x"); + assert(i == -1); + i = rfind("abcdefcdef", "xy"); + assert(i == -1); + i = rfind("abcdefcdef", ""); + assert(i == 10); +} + + +/****************************************** + * ditto + */ + +int irfind(char[] s, char[] sub) + out (result) + { + if (result == -1) + { + } + else + { + assert(0 <= result && result < s.length - sub.length + 1); + assert(icmp(s[result .. result + sub.length], sub) == 0); + } + } + body + { + dchar c; + + if (sub.length == 0) + return s.length; + c = sub[0]; + if (sub.length == 1) + return irfind(s, c); + if (c <= 0x7F) + { + c = std.ctype.tolower(c); + for (int i = s.length - sub.length; i >= 0; i--) + { + if (std.ctype.tolower(s[i]) == c) + { + if (icmp(s[i + 1 .. i + sub.length], sub[1 .. sub.length]) == 0) + return i; + } + } + } + else + { + for (int i = s.length - sub.length; i >= 0; i--) + { + if (icmp(s[i .. i + sub.length], sub) == 0) + return i; + } + } + return -1; + } + +unittest +{ + int i; + + debug(string) printf("string.irfind.unittest\n"); + i = irfind("abcdefCdef", "c"); + assert(i == 6); + i = irfind("abcdefCdef", "cD"); + assert(i == 6); + i = irfind("abcdefcdef", "x"); + assert(i == -1); + i = irfind("abcdefcdef", "xy"); + assert(i == -1); + i = irfind("abcdefcdef", ""); + assert(i == 10); + + char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun."; + char[] sMars = "Who\'s \'My Favorite Maritian?\'"; + + i = irfind("abcdefcdef", "c"); + assert(i == 6); + i = irfind("abcdefcdef", "cd"); + assert(i == 6); + i = irfind( "abcdefcdef", "def" ); + assert(i == 7); + + i = irfind(sMars, "RiTE maR"); + assert(i == 14); + i = irfind(sPlts, "FOuRTh"); + assert(i == 10); + i = irfind(sMars, "whO\'s \'MY"); + assert(i == 0); + i = irfind(sMars, sMars); + assert(i == 0); +} + + +/************************************ + * Convert string s[] to lower case. + */ + +string tolower(string s) +{ + int changed; + char[] r; + + for (size_t i = 0; i < s.length; i++) + { + auto c = s[i]; + if ('A' <= c && c <= 'Z') + { + if (!changed) + { + r = s.dup; + changed = 1; + } + r[i] = cast(char) (c + (cast(char)'a' - 'A')); + } + else if (c > 0x7F) + { + foreach (size_t j, dchar dc; s[i .. length]) + { + if (std.uni.isUniUpper(dc)) + { + dc = std.uni.toUniLower(dc); + if (!changed) + { + r = s[0 .. i + j].dup; + changed = 2; + } + } + if (changed) + { + if (changed == 1) + { r = r[0 .. i + j]; + changed = 2; + } + std.utf.encode(r, dc); + } + } + break; + } + } + return changed ? r : s; +} + +unittest +{ + debug(string) printf("string.tolower.unittest\n"); + + char[] s1 = "FoL"; + char[] s2; + + s2 = tolower(s1); + assert(cmp(s2, "fol") == 0); + assert(s2 != s1); + + s1 = "A\u0100B\u0101d"; + s2 = tolower(s1); + assert(cmp(s2, "a\u0101b\u0101d") == 0); + assert(s2 !is s1); + + s1 = "A\u0460B\u0461d"; + s2 = tolower(s1); + assert(cmp(s2, "a\u0461b\u0461d") == 0); + assert(s2 !is s1); + + s1 = "\u0130"; + s2 = tolower(s1); + assert(s2 == "i"); + assert(s2 !is s1); +} + +/************************************ + * Convert string s[] to upper case. + */ + +string toupper(string s) +{ + int changed; + char[] r; + + for (size_t i = 0; i < s.length; i++) + { + auto c = s[i]; + if ('a' <= c && c <= 'z') + { + if (!changed) + { + r = s.dup; + changed = 1; + } + r[i] = cast(char) (c - (cast(char)'a' - 'A')); + } + else if (c > 0x7F) + { + foreach (size_t j, dchar dc; s[i .. length]) + { + if (std.uni.isUniLower(dc)) + { + dc = std.uni.toUniUpper(dc); + if (!changed) + { + r = s[0 .. i + j].dup; + changed = 2; + } + } + if (changed) + { + if (changed == 1) + { r = r[0 .. i + j]; + changed = 2; + } + std.utf.encode(r, dc); + } + } + break; + } + } + return changed ? r : s; +} + +unittest +{ + debug(string) printf("string.toupper.unittest\n"); + + char[] s1 = "FoL"; + char[] s2; + + s2 = toupper(s1); + assert(cmp(s2, "FOL") == 0); + assert(s2 !is s1); + + s1 = "a\u0100B\u0101d"; + s2 = toupper(s1); + assert(cmp(s2, "A\u0100B\u0100D") == 0); + assert(s2 !is s1); + + s1 = "a\u0460B\u0461d"; + s2 = toupper(s1); + assert(cmp(s2, "A\u0460B\u0460D") == 0); + assert(s2 !is s1); +} + + +/******************************************** + * Capitalize first character of string s[], convert rest of string s[] + * to lower case. + */ + +char[] capitalize(char[] s) +{ + int changed; + int i; + char[] r = s; + + changed = 0; + + foreach (size_t i, dchar c; s) + { dchar c2; + + if (i == 0) + { + c2 = std.uni.toUniUpper(c); + if (c != c2) + { + changed = 1; + r = null; + } + } + else + { + c2 = std.uni.toUniLower(c); + if (c != c2) + { + if (!changed) + { changed = 1; + r = s[0 .. i].dup; + } + } + } + if (changed) + std.utf.encode(r, c2); + } + return r; +} + + +unittest +{ + debug(string) printf("string.toupper.capitalize\n"); + + char[] s1 = "FoL"; + char[] s2; + + s2 = capitalize(s1); + assert(cmp(s2, "Fol") == 0); + assert(s2 !is s1); + + s2 = capitalize(s1[0 .. 2]); + assert(cmp(s2, "Fo") == 0); + assert(s2.ptr == s1.ptr); + + s1 = "fOl"; + s2 = capitalize(s1); + assert(cmp(s2, "Fol") == 0); + assert(s2 !is s1); +} + + +/******************************************** + * Capitalize all words in string s[]. + * Remove leading and trailing whitespace. + * Replace all sequences of whitespace with a single space. + */ + +char[] capwords(char[] s) +{ + char[] r; + bool inword = false; + size_t istart = 0; + size_t i; + + for (i = 0; i < s.length; i++) + { + switch (s[i]) + { + case ' ': + case '\t': + case '\f': + case '\r': + case '\n': + case '\v': + if (inword) + { + r ~= capitalize(s[istart .. i]); + inword = false; + } + break; + + default: + if (!inword) + { + if (r.length) + r ~= ' '; + istart = i; + inword = true; + } + break; + } + } + if (inword) + { + r ~= capitalize(s[istart .. i]); + } + + return r; +} + + +unittest +{ + debug(string) printf("string.capwords.unittest\n"); + + char[] s1 = "\tfoo abc(aD)* \t (q PTT "; + char[] s2; + + s2 = capwords(s1); + //writefln("s2 = '%s'", s2); + assert(cmp(s2, "Foo Abc(ad)* (q Ptt") == 0); +} + +/******************************************** + * Return a string that consists of s[] repeated n times. + */ + +char[] repeat(char[] s, size_t n) +{ + if (n == 0) + return null; + if (n == 1) + return s; + char[] r = new char[n * s.length]; + if (s.length == 1) + r[] = s[0]; + else + { auto len = s.length; + + for (size_t i = 0; i < n * len; i += len) + { + r[i .. i + len] = s[]; + } + } + return r; +} + + +unittest +{ + debug(string) printf("string.repeat.unittest\n"); + + char[] s; + + s = repeat("1234", 0); + assert(s is null); + s = repeat("1234", 1); + assert(cmp(s, "1234") == 0); + s = repeat("1234", 2); + assert(cmp(s, "12341234") == 0); + s = repeat("1", 4); + assert(cmp(s, "1111") == 0); + s = repeat(null, 4); + assert(s is null); +} + + +/******************************************** + * Concatenate all the strings in words[] together into one + * string; use sep[] as the separator. + */ + +char[] join(char[][] words, char[] sep) +{ + char[] result; + + if (words.length) + { + size_t len = 0; + size_t i; + + for (i = 0; i < words.length; i++) + len += words[i].length; + + auto seplen = sep.length; + len += (words.length - 1) * seplen; + + result = new char[len]; + + size_t j; + i = 0; + while (true) + { + uint wlen = words[i].length; + + result[j .. j + wlen] = words[i]; + j += wlen; + i++; + if (i >= words.length) + break; + result[j .. j + seplen] = sep; + j += seplen; + } + assert(j == len); + } + return result; +} + +unittest +{ + debug(string) printf("string.join.unittest\n"); + + char[] word1 = "peter"; + char[] word2 = "paul"; + char[] word3 = "jerry"; + char[][3] words; + char[] r; + int i; + + words[0] = word1; + words[1] = word2; + words[2] = word3; + r = join(words, ","); + i = cmp(r, "peter,paul,jerry"); + assert(i == 0); +} + + +/************************************** + * Split s[] into an array of words, + * using whitespace as the delimiter. + */ + +char[][] split(char[] s) +{ + size_t i; + size_t istart = 0; + bool inword = false; + char[][] words; + + for (i = 0; i < s.length; i++) + { + switch (s[i]) + { + case ' ': + case '\t': + case '\f': + case '\r': + case '\n': + case '\v': + if (inword) + { + words ~= s[istart .. i]; + inword = false; + } + break; + + default: + if (!inword) + { istart = i; + inword = true; + } + break; + } + } + if (inword) + words ~= s[istart .. i]; + return words; +} + +unittest +{ + debug(string) printf("string.split1\n"); + + char[] s = " peter paul\tjerry "; + char[][] words; + int i; + + words = split(s); + assert(words.length == 3); + i = cmp(words[0], "peter"); + assert(i == 0); + i = cmp(words[1], "paul"); + assert(i == 0); + i = cmp(words[2], "jerry"); + assert(i == 0); +} + + +/************************************** + * Split s[] into an array of words, + * using delim[] as the delimiter. + */ + +char[][] split(char[] s, char[] delim) + in + { + assert(delim.length > 0); + } + body + { + size_t i; + size_t j; + char[][] words; + + i = 0; + if (s.length) + { + if (delim.length == 1) + { char c = delim[0]; + size_t nwords = 0; + char* p = &s[0]; + char* pend = p + s.length; + + while (true) + { + nwords++; + p = cast(char*)memchr(p, c, pend - p); + if (!p) + break; + p++; + if (p == pend) + { nwords++; + break; + } + } + words.length = nwords; + + int wordi = 0; + i = 0; + while (true) + { + p = cast(char*)memchr(&s[i], c, s.length - i); + if (!p) + { + words[wordi] = s[i .. s.length]; + break; + } + j = p - &s[0]; + words[wordi] = s[i .. j]; + wordi++; + i = j + 1; + if (i == s.length) + { + words[wordi] = ""; + break; + } + } + assert(wordi + 1 == nwords); + } + else + { size_t nwords = 0; + + while (true) + { + nwords++; + j = find(s[i .. s.length], delim); + if (j == -1) + break; + i += j + delim.length; + if (i == s.length) + { nwords++; + break; + } + assert(i < s.length); + } + words.length = nwords; + + int wordi = 0; + i = 0; + while (true) + { + j = find(s[i .. s.length], delim); + if (j == -1) + { + words[wordi] = s[i .. s.length]; + break; + } + words[wordi] = s[i .. i + j]; + wordi++; + i += j + delim.length; + if (i == s.length) + { + words[wordi] = ""; + break; + } + assert(i < s.length); + } + assert(wordi + 1 == nwords); + } + } + return words; + } + +unittest +{ + debug(string) printf("string.split2\n"); + + char[] s = ",peter,paul,jerry,"; + char[][] words; + int i; + + words = split(s, ","); + assert(words.length == 5); + i = cmp(words[0], ""); + assert(i == 0); + i = cmp(words[1], "peter"); + assert(i == 0); + i = cmp(words[2], "paul"); + assert(i == 0); + i = cmp(words[3], "jerry"); + assert(i == 0); + i = cmp(words[4], ""); + assert(i == 0); + + s = s[0 .. s.length - 1]; // lop off trailing ',' + words = split(s, ","); + assert(words.length == 4); + i = cmp(words[3], "jerry"); + assert(i == 0); + + s = s[1 .. s.length]; // lop off leading ',' + words = split(s, ","); + assert(words.length == 3); + i = cmp(words[0], "peter"); + assert(i == 0); + + char[] s2 = ",,peter,,paul,,jerry,,"; + + words = split(s2, ",,"); + //printf("words.length = %d\n", words.length); + assert(words.length == 5); + i = cmp(words[0], ""); + assert(i == 0); + i = cmp(words[1], "peter"); + assert(i == 0); + i = cmp(words[2], "paul"); + assert(i == 0); + i = cmp(words[3], "jerry"); + assert(i == 0); + i = cmp(words[4], ""); + assert(i == 0); + + s2 = s2[0 .. s2.length - 2]; // lop off trailing ',,' + words = split(s2, ",,"); + assert(words.length == 4); + i = cmp(words[3], "jerry"); + assert(i == 0); + + s2 = s2[2 .. s2.length]; // lop off leading ',,' + words = split(s2, ",,"); + assert(words.length == 3); + i = cmp(words[0], "peter"); + assert(i == 0); +} + + +/************************************** + * Split s[] into an array of lines, + * using CR, LF, or CR-LF as the delimiter. + * The delimiter is not included in the line. + */ + +char[][] splitlines(char[] s) +{ + uint i; + uint istart; + uint nlines; + char[][] lines; + + nlines = 0; + for (i = 0; i < s.length; i++) + { char c; + + c = s[i]; + if (c == '\r' || c == '\n') + { + nlines++; + istart = i + 1; + if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n') + { + i++; + istart++; + } + } + } + if (istart != i) + nlines++; + + lines = new char[][nlines]; + nlines = 0; + istart = 0; + for (i = 0; i < s.length; i++) + { char c; + + c = s[i]; + if (c == '\r' || c == '\n') + { + lines[nlines] = s[istart .. i]; + nlines++; + istart = i + 1; + if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n') + { + i++; + istart++; + } + } + } + if (istart != i) + { lines[nlines] = s[istart .. i]; + nlines++; + } + + assert(nlines == lines.length); + return lines; +} + +unittest +{ + debug(string) printf("string.splitlines\n"); + + char[] s = "\rpeter\n\rpaul\r\njerry\n"; + char[][] lines; + int i; + + lines = splitlines(s); + //printf("lines.length = %d\n", lines.length); + assert(lines.length == 5); + //printf("lines[0] = %llx, '%.*s'\n", lines[0], lines[0]); + assert(lines[0].length == 0); + i = cmp(lines[1], "peter"); + assert(i == 0); + assert(lines[2].length == 0); + i = cmp(lines[3], "paul"); + assert(i == 0); + i = cmp(lines[4], "jerry"); + assert(i == 0); + + s = s[0 .. s.length - 1]; // lop off trailing \n + lines = splitlines(s); + //printf("lines.length = %d\n", lines.length); + assert(lines.length == 5); + i = cmp(lines[4], "jerry"); + assert(i == 0); +} + + +/***************************************** + * Strips leading or trailing whitespace, or both. + */ + +char[] stripl(char[] s) +{ + uint i; + + for (i = 0; i < s.length; i++) + { + if (!std.ctype.isspace(s[i])) + break; + } + return s[i .. s.length]; +} + +char[] stripr(char[] s) /// ditto +{ + uint i; + + for (i = s.length; i > 0; i--) + { + if (!std.ctype.isspace(s[i - 1])) + break; + } + return s[0 .. i]; +} + +char[] strip(char[] s) /// ditto +{ + return stripr(stripl(s)); +} + +unittest +{ + debug(string) printf("string.strip.unittest\n"); + char[] s; + int i; + + s = strip(" foo\t "); + i = cmp(s, "foo"); + assert(i == 0); +} + +/******************************************* + * Returns s[] sans trailing delimiter[], if any. + * If delimiter[] is null, removes trailing CR, LF, or CRLF, if any. + */ + +char[] chomp(char[] s, char[] delimiter = null) +{ + if (delimiter is null) + { auto len = s.length; + + if (len) + { auto c = s[len - 1]; + + if (c == '\r') // if ends in CR + len--; + else if (c == '\n') // if ends in LF + { + len--; + if (len && s[len - 1] == '\r') + len--; // remove CR-LF + } + } + return s[0 .. len]; + } + else if (s.length >= delimiter.length) + { + if (s[length - delimiter.length .. length] == delimiter) + return s[0 .. length - delimiter.length]; + } + return s; +} + +unittest +{ + debug(string) printf("string.chomp.unittest\n"); + char[] s; + + s = chomp(null); + assert(s is null); + s = chomp("hello"); + assert(s == "hello"); + s = chomp("hello\n"); + assert(s == "hello"); + s = chomp("hello\r"); + assert(s == "hello"); + s = chomp("hello\r\n"); + assert(s == "hello"); + s = chomp("hello\n\r"); + assert(s == "hello\n"); + s = chomp("hello\n\n"); + assert(s == "hello\n"); + s = chomp("hello\r\r"); + assert(s == "hello\r"); + s = chomp("hello\nxxx\n"); + assert(s == "hello\nxxx"); + + s = chomp(null, null); + assert(s is null); + s = chomp("hello", "o"); + assert(s == "hell"); + s = chomp("hello", "p"); + assert(s == "hello"); + s = chomp("hello", null); + assert(s == "hello"); + s = chomp("hello", "llo"); + assert(s == "he"); +} + + +/*********************************************** + * Returns s[] sans trailing character, if there is one. + * If last two characters are CR-LF, then both are removed. + */ + +char[] chop(char[] s) +{ auto len = s.length; + + if (len) + { + if (len >= 2 && s[len - 1] == '\n' && s[len - 2] == '\r') + return s[0 .. len - 2]; + + // If we're in a tail of a UTF-8 sequence, back up + while ((s[len - 1] & 0xC0) == 0x80) + { + len--; + if (len == 0) + throw new std.utf.UtfException("invalid UTF sequence", 0); + } + + return s[0 .. len - 1]; + } + return s; +} + + +unittest +{ + debug(string) printf("string.chop.unittest\n"); + char[] s; + + s = chop(null); + assert(s is null); + s = chop("hello"); + assert(s == "hell"); + s = chop("hello\r\n"); + assert(s == "hello"); + s = chop("hello\n\r"); + assert(s == "hello\n"); +} + + +/******************************************* + * Left justify, right justify, or center string s[] + * in field width chars wide. + */ + +char[] ljustify(char[] s, int width) +{ + if (s.length >= width) + return s; + char[] r = new char[width]; + r[0..s.length] = s; + r[s.length .. width] = cast(char)' '; + return r; +} + +/// ditto +char[] rjustify(char[] s, int width) +{ + if (s.length >= width) + return s; + char[] r = new char[width]; + r[0 .. width - s.length] = cast(char)' '; + r[width - s.length .. width] = s; + return r; +} + +/// ditto +char[] center(char[] s, int width) +{ + if (s.length >= width) + return s; + char[] r = new char[width]; + int left = (width - s.length) / 2; + r[0 .. left] = cast(char)' '; + r[left .. left + s.length] = s; + r[left + s.length .. width] = cast(char)' '; + return r; +} + +unittest +{ + debug(string) printf("string.justify.unittest\n"); + + char[] s = "hello"; + char[] r; + int i; + + r = ljustify(s, 8); + i = cmp(r, "hello "); + assert(i == 0); + + r = rjustify(s, 8); + i = cmp(r, " hello"); + assert(i == 0); + + r = center(s, 8); + i = cmp(r, " hello "); + assert(i == 0); + + r = zfill(s, 8); + i = cmp(r, "000hello"); + assert(i == 0); +} + + +/***************************************** + * Same as rjustify(), but fill with '0's. + */ + +char[] zfill(char[] s, int width) +{ + if (s.length >= width) + return s; + char[] r = new char[width]; + r[0 .. width - s.length] = cast(char)'0'; + r[width - s.length .. width] = s; + return r; +} + +/******************************************** + * Replace occurrences of from[] with to[] in s[]. + */ + +char[] replace(char[] s, char[] from, char[] to) +{ + char[] p; + int i; + size_t istart; + + //printf("replace('%.*s','%.*s','%.*s')\n", s, from, to); + if (from.length == 0) + return s; + istart = 0; + while (istart < s.length) + { + i = find(s[istart .. s.length], from); + if (i == -1) + { + p ~= s[istart .. s.length]; + break; + } + p ~= s[istart .. istart + i]; + p ~= to; + istart += i + from.length; + } + return p; +} + +unittest +{ + debug(string) printf("string.replace.unittest\n"); + + char[] s = "This is a foo foo list"; + char[] from = "foo"; + char[] to = "silly"; + char[] r; + int i; + + r = replace(s, from, to); + i = cmp(r, "This is a silly silly list"); + assert(i == 0); + + r = replace(s, "", to); + i = cmp(r, "This is a foo foo list"); + assert(i == 0); +} + +/***************************** + * Return a _string that is string[] with slice[] replaced by replacement[]. + */ + +char[] replaceSlice(char[] string, char[] slice, char[] replacement) +in +{ + // Verify that slice[] really is a slice of string[] + int so = cast(char*)slice - cast(char*)string; + assert(so >= 0); + //printf("string.length = %d, so = %d, slice.length = %d\n", string.length, so, slice.length); + assert(string.length >= so + slice.length); +} +body +{ + char[] result; + int so = cast(char*)slice - cast(char*)string; + + result.length = string.length - slice.length + replacement.length; + + result[0 .. so] = string[0 .. so]; + result[so .. so + replacement.length] = replacement; + result[so + replacement.length .. result.length] = string[so + slice.length .. string.length]; + + return result; +} + +unittest +{ + debug(string) printf("string.replaceSlice.unittest\n"); + + char[] string = "hello"; + char[] slice = string[2 .. 4]; + + char[] r = replaceSlice(string, slice, "bar"); + int i; + i = cmp(r, "hebaro"); + assert(i == 0); +} + +/********************************************** + * Insert sub[] into s[] at location index. + */ + +char[] insert(char[] s, size_t index, char[] sub) +in +{ + assert(0 <= index && index <= s.length); +} +body +{ + if (sub.length == 0) + return s; + + if (s.length == 0) + return sub; + + int newlength = s.length + sub.length; + char[] result = new char[newlength]; + + result[0 .. index] = s[0 .. index]; + result[index .. index + sub.length] = sub; + result[index + sub.length .. newlength] = s[index .. s.length]; + return result; +} + +unittest +{ + debug(string) printf("string.insert.unittest\n"); + + char[] r; + int i; + + r = insert("abcd", 0, "e"); + i = cmp(r, "eabcd"); + assert(i == 0); + + r = insert("abcd", 4, "e"); + i = cmp(r, "abcde"); + assert(i == 0); + + r = insert("abcd", 2, "ef"); + i = cmp(r, "abefcd"); + assert(i == 0); + + r = insert(null, 0, "e"); + i = cmp(r, "e"); + assert(i == 0); + + r = insert("abcd", 0, null); + i = cmp(r, "abcd"); + assert(i == 0); +} + +/*********************************************** + * Count up all instances of sub[] in s[]. + */ + +size_t count(char[] s, char[] sub) +{ + size_t i; + int j; + int count = 0; + + for (i = 0; i < s.length; i += j + sub.length) + { + j = find(s[i .. s.length], sub); + if (j == -1) + break; + count++; + } + return count; +} + +unittest +{ + debug(string) printf("string.count.unittest\n"); + + char[] s = "This is a fofofof list"; + char[] sub = "fof"; + int i; + + i = count(s, sub); + assert(i == 2); +} + + +/************************************************ + * Replace tabs with the appropriate number of spaces. + * tabsize is the distance between tab stops. + */ + +char[] expandtabs(char[] string, int tabsize = 8) +{ + bool changes = false; + char[] result = string; + int column; + int nspaces; + + foreach (size_t i, dchar c; string) + { + switch (c) + { + case '\t': + nspaces = tabsize - (column % tabsize); + if (!changes) + { + changes = true; + result = null; + result.length = string.length + nspaces - 1; + result.length = i + nspaces; + result[0 .. i] = string[0 .. i]; + result[i .. i + nspaces] = ' '; + } + else + { int j = result.length; + result.length = j + nspaces; + result[j .. j + nspaces] = ' '; + } + column += nspaces; + break; + + case '\r': + case '\n': + case PS: + case LS: + column = 0; + goto L1; + + default: + column++; + L1: + if (changes) + { + if (c <= 0x7F) + result ~= cast(char)c; + else + std.utf.encode(result, c); + } + break; + } + } + + return result; +} + +unittest +{ + debug(string) printf("string.expandtabs.unittest\n"); + + char[] s = "This \tis\t a fofof\tof list"; + char[] r; + int i; + + r = expandtabs(s, 8); + i = cmp(r, "This is a fofof of list"); + assert(i == 0); + + r = expandtabs(null); + assert(r == null); + r = expandtabs(""); + assert(r.length == 0); + r = expandtabs("a"); + assert(r == "a"); + r = expandtabs("\t"); + assert(r == " "); + r = expandtabs( " ab\tasdf "); + //writefln("r = '%s'", r); + assert(r == " ab asdf "); + // TODO: need UTF test case +} + + +/******************************************* + * Replace spaces in string with the optimal number of tabs. + * Trailing spaces or tabs in a line are removed. + * Params: + * string = String to convert. + * tabsize = Tab columns are tabsize spaces apart. tabsize defaults to 8. + */ + +char[] entab(char[] string, int tabsize = 8) +{ + bool changes = false; + char[] result = string; + + int nspaces = 0; + int nwhite = 0; + int column = 0; // column number + + foreach (size_t i, dchar c; string) + { + + void change() + { + changes = true; + result = null; + result.length = string.length; + result.length = i; + result[0 .. i] = string[0 .. i]; + } + + switch (c) + { + case '\t': + nwhite++; + if (nspaces) + { + if (!changes) + change(); + + int j = result.length - nspaces; + int ntabs = (((column - nspaces) % tabsize) + nspaces) / tabsize; + result.length = j + ntabs; + result[j .. j + ntabs] = '\t'; + nwhite += ntabs - nspaces; + nspaces = 0; + } + column = (column + tabsize) / tabsize * tabsize; + break; + + case '\r': + case '\n': + case PS: + case LS: + // Truncate any trailing spaces or tabs + if (nwhite) + { + if (!changes) + change(); + result = result[0 .. result.length - nwhite]; + } + break; + + default: + if (nspaces >= 2 && (column % tabsize) == 0) + { + if (!changes) + change(); + + int j = result.length - nspaces; + int ntabs = (nspaces + tabsize - 1) / tabsize; + result.length = j + ntabs; + result[j .. j + ntabs] = '\t'; + nwhite += ntabs - nspaces; + nspaces = 0; + } + if (c == ' ') + { nwhite++; + nspaces++; + } + else + { nwhite = 0; + nspaces = 0; + } + column++; + break; + } + if (changes) + { + if (c <= 0x7F) + result ~= cast(char)c; + else + std.utf.encode(result, c); + } + } + + // Truncate any trailing spaces or tabs + if (nwhite) + result = result[0 .. result.length - nwhite]; + + return result; +} + +unittest +{ + debug(string) printf("string.entab.unittest\n"); + + char[] r; + + r = entab(null); + assert(r == null); + r = entab(""); + assert(r.length == 0); + r = entab("a"); + assert(r == "a"); + r = entab(" "); + assert(r == ""); + r = entab(" x"); + assert(r == "\tx"); + r = entab(" ab asdf "); + assert(r == " ab\tasdf"); + r = entab(" ab asdf "); + assert(r == " ab\t asdf"); + r = entab(" ab \t asdf "); + assert(r == " ab\t asdf"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\ta"); + r = entab("1234567 \ta"); + assert(r == "1234567\t\t\ta"); + // TODO: need UTF test case +} + + + +/************************************ + * Construct translation table for translate(). + * BUG: only works with ASCII + */ + +char[] maketrans(char[] from, char[] to) + in + { + assert(from.length == to.length); + assert(from.length <= 128); + foreach (char c; from) + { + assert(c <= 0x7F); + } + foreach (char c; to) + { + assert(c <= 0x7F); + } + } + body + { + char[] t = new char[256]; + int i; + + for (i = 0; i < t.length; i++) + t[i] = cast(char)i; + + for (i = 0; i < from.length; i++) + t[from[i]] = to[i]; + + return t; + } + +/****************************************** + * Translate characters in s[] using table created by maketrans(). + * Delete chars in delchars[]. + * BUG: only works with ASCII + */ + +char[] translate(char[] s, char[] transtab, char[] delchars) + in + { + assert(transtab.length == 256); + } + body + { + char[] r; + int count; + bool[256] deltab; + + deltab[] = false; + foreach (char c; delchars) + { + deltab[c] = true; + } + + count = 0; + foreach (char c; s) + { + if (!deltab[c]) + count++; + //printf("s[%d] = '%c', count = %d\n", i, s[i], count); + } + + r = new char[count]; + count = 0; + foreach (char c; s) + { + if (!deltab[c]) + { + r[count] = transtab[c]; + count++; + } + } + + return r; + } + +unittest +{ + debug(string) printf("string.translate.unittest\n"); + + char[] from = "abcdef"; + char[] to = "ABCDEF"; + char[] s = "The quick dog fox"; + char[] t; + char[] r; + int i; + + t = maketrans(from, to); + r = translate(s, t, "kg"); + //printf("r = '%.*s'\n", r); + i = cmp(r, "ThE quiC Do Fox"); + assert(i == 0); +} + +/*********************************************** + * Convert to char[]. + */ + +char[] toString(bool b) +{ + return b ? "true" : "false"; +} + +/// ditto +char[] toString(char c) +{ + char[] result = new char[2]; + result[0] = c; + result[1] = 0; + return result[0 .. 1]; +} + +unittest +{ + debug(string) printf("string.toString(char).unittest\n"); + + char[] s = "foo"; + char[] s2; + foreach (char c; s) + { + s2 ~= std.string.toString(c); + } + //printf("%.*s", s2); + assert(s2 == "foo"); +} + +char[] toString(ubyte ub) { return toString(cast(uint) ub); } /// ditto +char[] toString(ushort us) { return toString(cast(uint) us); } /// ditto + +/// ditto +char[] toString(uint u) +{ char[uint.sizeof * 3] buffer = void; + int ndigits; + char[] result; + + ndigits = 0; + if (u < 10) + // Avoid storage allocation for simple stuff + result = digits[u .. u + 1]; + else + { + while (u) + { + uint c = (u % 10) + '0'; + u /= 10; + ndigits++; + buffer[buffer.length - ndigits] = cast(char)c; + } + result = new char[ndigits]; + result[] = buffer[buffer.length - ndigits .. buffer.length]; + } + return result; +} + +unittest +{ + debug(string) printf("string.toString(uint).unittest\n"); + + char[] r; + int i; + + r = toString(0u); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(9u); + i = cmp(r, "9"); + assert(i == 0); + + r = toString(123u); + i = cmp(r, "123"); + assert(i == 0); +} + +/// ditto +char[] toString(ulong u) +{ char[ulong.sizeof * 3] buffer; + int ndigits; + char[] result; + + if (u < 0x1_0000_0000) + return toString(cast(uint)u); + ndigits = 0; + while (u) + { + char c = cast(char)((u % 10) + '0'); + u /= 10; + ndigits++; + buffer[buffer.length - ndigits] = c; + } + result = new char[ndigits]; + result[] = buffer[buffer.length - ndigits .. buffer.length]; + return result; +} + +unittest +{ + debug(string) printf("string.toString(ulong).unittest\n"); + + char[] r; + int i; + + r = toString(0uL); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(9uL); + i = cmp(r, "9"); + assert(i == 0); + + r = toString(123uL); + i = cmp(r, "123"); + assert(i == 0); +} + +char[] toString(byte b) { return toString(cast(int) b); } /// ditto +char[] toString(short s) { return toString(cast(int) s); } /// ditto + +/// ditto +char[] toString(int i) +{ char[1 + int.sizeof * 3] buffer; + char[] result; + + if (i >= 0) + return toString(cast(uint)i); + + uint u = -i; + int ndigits = 1; + while (u) + { + char c = cast(char)((u % 10) + '0'); + u /= 10; + buffer[buffer.length - ndigits] = c; + ndigits++; + } + buffer[buffer.length - ndigits] = '-'; + result = new char[ndigits]; + result[] = buffer[buffer.length - ndigits .. buffer.length]; + return result; +} + +unittest +{ + debug(string) printf("string.toString(int).unittest\n"); + + char[] r; + int i; + + r = toString(0); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(9); + i = cmp(r, "9"); + assert(i == 0); + + r = toString(123); + i = cmp(r, "123"); + assert(i == 0); + + r = toString(-0); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(-9); + i = cmp(r, "-9"); + assert(i == 0); + + r = toString(-123); + i = cmp(r, "-123"); + assert(i == 0); +} + +/// ditto +char[] toString(long i) +{ char[1 + long.sizeof * 3] buffer; + char[] result; + + if (i >= 0) + return toString(cast(ulong)i); + if (cast(int)i == i) + return toString(cast(int)i); + + ulong u = cast(ulong)(-i); + int ndigits = 1; + while (u) + { + char c = cast(char)((u % 10) + '0'); + u /= 10; + buffer[buffer.length - ndigits] = c; + ndigits++; + } + buffer[buffer.length - ndigits] = '-'; + result = new char[ndigits]; + result[] = buffer[buffer.length - ndigits .. buffer.length]; + return result; +} + +unittest +{ + debug(string) printf("string.toString(long).unittest\n"); + + char[] r; + int i; + + r = toString(0L); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(9L); + i = cmp(r, "9"); + assert(i == 0); + + r = toString(123L); + i = cmp(r, "123"); + assert(i == 0); + + r = toString(-0L); + i = cmp(r, "0"); + assert(i == 0); + + r = toString(-9L); + i = cmp(r, "-9"); + assert(i == 0); + + r = toString(-123L); + i = cmp(r, "-123"); + assert(i == 0); +} + +/// ditto +char[] toString(float f) { return toString(cast(double) f); } + +/// ditto +char[] toString(double d) +{ + char[20] buffer; + + int len = sprintf(buffer.ptr, "%g", d); + return buffer[0 .. len].dup; +} + +/// ditto +char[] toString(real r) +{ + char[20] buffer; + + int len = sprintf(buffer.ptr, "%Lg", r); + return buffer[0 .. len].dup; +} + +/// ditto +char[] toString(ifloat f) { return toString(cast(idouble) f); } + +/// ditto +char[] toString(idouble d) +{ + char[21] buffer; + + int len = sprintf(buffer.ptr, "%gi", d); + return buffer[0 .. len].dup; +} + +/// ditto +char[] toString(ireal r) +{ + char[21] buffer; + + int len = sprintf(buffer.ptr, "%Lgi", r); + return buffer[0 .. len].dup; +} + +/// ditto +char[] toString(cfloat f) { return toString(cast(cdouble) f); } + +/// ditto +char[] toString(cdouble d) +{ + char[20 + 1 + 20 + 1] buffer; + + int len = sprintf(buffer.ptr, "%g+%gi", d.re, d.im); + return buffer[0 .. len].dup; +} + +/// ditto +char[] toString(creal r) +{ + char[20 + 1 + 20 + 1] buffer; + + int len = sprintf(buffer.ptr, "%Lg+%Lgi", r.re, r.im); + return buffer[0 .. len].dup; +} + + +/****************************************** + * Convert value to string in _radix radix. + * + * radix must be a value from 2 to 36. + * value is treated as a signed value only if radix is 10. + * The characters A through Z are used to represent values 10 through 36. + */ +char[] toString(long value, uint radix) +in +{ + assert(radix >= 2 && radix <= 36); +} +body +{ + if (radix == 10) + return toString(value); // handle signed cases only for radix 10 + return toString(cast(ulong)value, radix); +} + +/// ditto +char[] toString(ulong value, uint radix) +in +{ + assert(radix >= 2 && radix <= 36); +} +body +{ + char[value.sizeof * 8] buffer; + uint i = buffer.length; + + if (value < radix && value < hexdigits.length) + return hexdigits[cast(size_t)value .. cast(size_t)value + 1]; + + do + { ubyte c; + + c = cast(ubyte)(value % radix); + value = value / radix; + i--; + buffer[i] = cast(char)((c < 10) ? c + '0' : c + 'A' - 10); + } while (value); + return buffer[i .. length].dup; +} + +unittest +{ + debug(string) printf("string.toString(ulong, uint).unittest\n"); + + char[] r; + int i; + + r = toString(-10L, 10u); + assert(r == "-10"); + + r = toString(15L, 2u); + //writefln("r = '%s'", r); + assert(r == "1111"); + + r = toString(1L, 2u); + //writefln("r = '%s'", r); + assert(r == "1"); + + r = toString(0x1234AFL, 16u); + //writefln("r = '%s'", r); + assert(r == "1234AF"); +} + +/************************************************* + * Convert C-style 0 terminated string s to char[] string. + */ + +char[] toString(char *s) +{ + return s ? s[0 .. strlen(s)] : cast(char[])null; +} + +unittest +{ + debug(string) printf("string.toString(char*).unittest\n"); + + char[] r; + int i; + + r = toString(null); + i = cmp(r, ""); + assert(i == 0); + + r = toString("foo\0"); + i = cmp(r, "foo"); + assert(i == 0); +} + + +/***************************************************** + * Format arguments into a string. + */ + + +char[] format(...) +{ + char[] s; + + void putc(dchar c) + { + std.utf.encode(s, c); + } + + std.format.doFormat(&putc, _arguments, _argptr); + return s; +} + + +/***************************************************** + * Format arguments into string <i>s</i> which must be large + * enough to hold the result. Throws ArrayBoundsError if it is not. + * Returns: s + */ +char[] sformat(char[] s, ...) +{ size_t i; + + void putc(dchar c) + { + if (c <= 0x7F) + { + if (i >= s.length) + throw new ArrayBoundsError("std.string.sformat", 0); + s[i] = cast(char)c; + ++i; + } + else + { char[4] buf; + char[] b; + + b = std.utf.toUTF8(buf, c); + if (i + b.length > s.length) + throw new ArrayBoundsError("std.string.sformat", 0); + s[i..i+b.length] = b[]; + i += b.length; + } + } + + std.format.doFormat(&putc, _arguments, _argptr); + return s[0 .. i]; +} + + +unittest +{ + debug(string) printf("std.string.format.unittest\n"); + + char[] r; + int i; +/+ + r = format(null); + i = cmp(r, ""); + assert(i == 0); ++/ + r = format("foo"); + i = cmp(r, "foo"); + assert(i == 0); + + r = format("foo%%"); + i = cmp(r, "foo%"); + assert(i == 0); + + r = format("foo%s", 'C'); + i = cmp(r, "fooC"); + assert(i == 0); + + r = format("%s foo", "bar"); + i = cmp(r, "bar foo"); + assert(i == 0); + + r = format("%s foo %s", "bar", "abc"); + i = cmp(r, "bar foo abc"); + assert(i == 0); + + r = format("foo %d", -123); + i = cmp(r, "foo -123"); + assert(i == 0); + + r = format("foo %d", 123); + i = cmp(r, "foo 123"); + assert(i == 0); +} + + +/*********************************************** + * See if character c is in the pattern. + * Patterns: + * + * A <i>pattern</i> is an array of characters much like a <i>character + * class</i> in regular expressions. A sequence of characters + * can be given, such as "abcde". The '-' can represent a range + * of characters, as "a-e" represents the same pattern as "abcde". + * "a-fA-F0-9" represents all the hex characters. + * If the first character of a pattern is '^', then the pattern + * is negated, i.e. "^0-9" means any character except a digit. + * The functions inPattern, <b>countchars</b>, <b>removeschars</b>, + * and <b>squeeze</b> + * use patterns. + * + * Note: In the future, the pattern syntax may be improved + * to be more like regular expression character classes. + */ + +bool inPattern(dchar c, char[] pattern) +{ + bool result = false; + int range = 0; + dchar lastc; + + foreach (size_t i, dchar p; pattern) + { + if (p == '^' && i == 0) + { result = true; + if (i + 1 == pattern.length) + return (c == p); // or should this be an error? + } + else if (range) + { + range = 0; + if (lastc <= c && c <= p || c == p) + return !result; + } + else if (p == '-' && i > result && i + 1 < pattern.length) + { + range = 1; + continue; + } + else if (c == p) + return !result; + lastc = p; + } + return result; +} + + +unittest +{ + debug(string) printf("std.string.inPattern.unittest\n"); + + int i; + + i = inPattern('x', "x"); + assert(i == 1); + i = inPattern('x', "y"); + assert(i == 0); + i = inPattern('x', cast(char[])null); + assert(i == 0); + i = inPattern('x', "^y"); + assert(i == 1); + i = inPattern('x', "yxxy"); + assert(i == 1); + i = inPattern('x', "^yxxy"); + assert(i == 0); + i = inPattern('x', "^abcd"); + assert(i == 1); + i = inPattern('^', "^^"); + assert(i == 0); + i = inPattern('^', "^"); + assert(i == 1); + i = inPattern('^', "a^"); + assert(i == 1); + i = inPattern('x', "a-z"); + assert(i == 1); + i = inPattern('x', "A-Z"); + assert(i == 0); + i = inPattern('x', "^a-z"); + assert(i == 0); + i = inPattern('x', "^A-Z"); + assert(i == 1); + i = inPattern('-', "a-"); + assert(i == 1); + i = inPattern('-', "^A-"); + assert(i == 0); + i = inPattern('a', "z-a"); + assert(i == 1); + i = inPattern('z', "z-a"); + assert(i == 1); + i = inPattern('x', "z-a"); + assert(i == 0); +} + + +/*********************************************** + * See if character c is in the intersection of the patterns. + */ + +int inPattern(dchar c, char[][] patterns) +{ int result; + + foreach (char[] pattern; patterns) + { + if (!inPattern(c, pattern)) + { result = 0; + break; + } + result = 1; + } + return result; +} + + +/******************************************** + * Count characters in s that match pattern. + */ + +size_t countchars(char[] s, char[] pattern) +{ + size_t count; + + foreach (dchar c; s) + { + count += inPattern(c, pattern); + } + return count; +} + + +unittest +{ + debug(string) printf("std.string.count.unittest\n"); + + size_t c; + + c = countchars("abc", "a-c"); + assert(c == 3); + c = countchars("hello world", "or"); + assert(c == 3); +} + + +/******************************************** + * Return string that is s with all characters removed that match pattern. + */ + +char[] removechars(char[] s, char[] pattern) +{ + char[] r = s; + int changed; + size_t j; + + foreach (size_t i, dchar c; s) + { + if (!inPattern(c, pattern)) + { + if (changed) + { + if (r is s) + r = s[0 .. j].dup; + std.utf.encode(r, c); + } + } + else if (!changed) + { changed = 1; + j = i; + } + } + if (changed && r is s) + r = s[0 .. j].dup; + return r; +} + + +unittest +{ + debug(string) printf("std.string.remove.unittest\n"); + + char[] r; + + r = removechars("abc", "a-c"); + assert(r is null); + r = removechars("hello world", "or"); + assert(r == "hell wld"); + r = removechars("hello world", "d"); + assert(r == "hello worl"); +} + + +/*************************************************** + * Return string where sequences of a character in s[] from pattern[] + * are replaced with a single instance of that character. + * If pattern is null, it defaults to all characters. + */ + +char[] squeeze(char[] s, char[] pattern = null) +{ + char[] r = s; + dchar lastc; + size_t lasti; + int run; + bool changed; + + foreach (size_t i, dchar c; s) + { + if (run && lastc == c) + { + changed = true; + } + else if (pattern is null || inPattern(c, pattern)) + { + run = 1; + if (changed) + { if (r is s) + r = s[0 .. lasti].dup; + std.utf.encode(r, c); + } + else + lasti = i + std.utf.stride(s, i); + lastc = c; + } + else + { + run = 0; + if (changed) + { if (r is s) + r = s[0 .. lasti].dup; + std.utf.encode(r, c); + } + } + } + if (changed) + { + if (r is s) + r = s[0 .. lasti]; + } + return r; +} + + +unittest +{ + debug(string) printf("std.string.squeeze.unittest\n"); + char[] s,r; + + r = squeeze("hello"); + //writefln("r = '%s'", r); + assert(r == "helo"); + s = "abcd"; + r = squeeze(s); + assert(r is s); + s = "xyzz"; + r = squeeze(s); + assert(r.ptr == s.ptr); // should just be a slice + r = squeeze("hello goodbyee", "oe"); + assert(r == "hello godbye"); +} + + +/********************************************** + * Return string that is the 'successor' to s[]. + * If the rightmost character is a-zA-Z0-9, it is incremented within + * its case or digits. If it generates a carry, the process is + * repeated with the one to its immediate left. + */ + +char[] succ(char[] s) +{ + if (s.length && isalnum(s[length - 1])) + { + char[] r = s.dup; + size_t i = r.length - 1; + + while (1) + { dchar c = s[i]; + dchar carry; + + switch (c) + { + case '9': + c = '0'; + carry = '1'; + goto Lcarry; + case 'z': + case 'Z': + c -= 'Z' - 'A'; + carry = c; + Lcarry: + r[i] = cast(char)c; + if (i == 0) + { + char[] t = new char[r.length + 1]; + t[0] = cast(char)carry; + t[1 .. length] = r[]; + return t; + } + i--; + break; + + default: + if (std.ctype.isalnum(c)) + r[i]++; + return r; + } + } + } + return s; +} + +unittest +{ + debug(string) printf("std.string.succ.unittest\n"); + + char[] r; + + r = succ(null); + assert(r is null); + r = succ("!@#$%"); + assert(r == "!@#$%"); + r = succ("1"); + assert(r == "2"); + r = succ("9"); + assert(r == "10"); + r = succ("999"); + assert(r == "1000"); + r = succ("zz99"); + assert(r == "aaa00"); +} + + +/*********************************************** + * Replaces characters in str[] that are in from[] + * with corresponding characters in to[] and returns the resulting + * string. + * Params: + * modifiers = a string of modifier characters + * Modifiers: + <table border=1 cellspacing=0 cellpadding=5> + <tr> <th>Modifier <th>Description + <tr> <td><b>c</b> <td>Complement the list of characters in from[] + <tr> <td><b>d</b> <td>Removes matching characters with no corresponding replacement in to[] + <tr> <td><b>s</b> <td>Removes adjacent duplicates in the replaced characters + </table> + + If modifier <b>d</b> is present, then the number of characters + in to[] may be only 0 or 1. + + If modifier <b>d</b> is not present and to[] is null, + then to[] is taken _to be the same as from[]. + + If modifier <b>d</b> is not present and to[] is shorter + than from[], then to[] is extended by replicating the + last character in to[]. + + Both from[] and to[] may contain ranges using the <b>-</b> + character, for example <b>a-d</b> is synonymous with <b>abcd</b>. + Neither accept a leading <b>^</b> as meaning the complement of + the string (use the <b>c</b> modifier for that). + */ + +char[] tr(char[] str, char[] from, char[] to, char[] modifiers = null) +{ + int mod_c; + int mod_d; + int mod_s; + + foreach (char c; modifiers) + { + switch (c) + { + case 'c': mod_c = 1; break; // complement + case 'd': mod_d = 1; break; // delete unreplaced chars + case 's': mod_s = 1; break; // squeeze duplicated replaced chars + default: assert(0); + } + } + + if (to is null && !mod_d) + to = from; + + char[] result = new char[str.length]; + result.length = 0; + int m; + dchar lastc; + + foreach (dchar c; str) + { dchar lastf; + dchar lastt; + dchar newc; + int n = 0; + + for (size_t i = 0; i < from.length; ) + { + dchar f = std.utf.decode(from, i); + //writefln("\tf = '%s', c = '%s', lastf = '%x', '%x', i = %d, %d", f, c, lastf, dchar.init, i, from.length); + if (f == '-' && lastf != dchar.init && i < from.length) + { + dchar nextf = std.utf.decode(from, i); + //writefln("\tlastf = '%s', c = '%s', nextf = '%s'", lastf, c, nextf); + if (lastf <= c && c <= nextf) + { + n += c - lastf - 1; + if (mod_c) + goto Lnotfound; + goto Lfound; + } + n += nextf - lastf; + lastf = lastf.init; + continue; + } + + if (c == f) + { if (mod_c) + goto Lnotfound; + goto Lfound; + } + lastf = f; + n++; + } + if (!mod_c) + goto Lnotfound; + n = 0; // consider it 'found' at position 0 + + Lfound: + + // Find the nth character in to[] + //writefln("\tc = '%s', n = %d", c, n); + dchar nextt; + for (size_t i = 0; i < to.length; ) + { dchar t = std.utf.decode(to, i); + if (t == '-' && lastt != dchar.init && i < to.length) + { + nextt = std.utf.decode(to, i); + //writefln("\tlastt = '%s', c = '%s', nextt = '%s', n = %d", lastt, c, nextt, n); + n -= nextt - lastt; + if (n < 0) + { + newc = nextt + n + 1; + goto Lnewc; + } + lastt = dchar.init; + continue; + } + if (n == 0) + { newc = t; + goto Lnewc; + } + lastt = t; + nextt = t; + n--; + } + if (mod_d) + continue; + newc = nextt; + + Lnewc: + if (mod_s && m && newc == lastc) + continue; + std.utf.encode(result, newc); + m = 1; + lastc = newc; + continue; + + Lnotfound: + std.utf.encode(result, c); + lastc = c; + m = 0; + } + return result; +} + +unittest +{ + debug(string) printf("std.string.tr.unittest\n"); + + char[] r; + //writefln("r = '%s'", r); + + r = tr("abcdef", "cd", "CD"); + assert(r == "abCDef"); + + r = tr("abcdef", "b-d", "B-D"); + assert(r == "aBCDef"); + + r = tr("abcdefgh", "b-dh", "B-Dx"); + assert(r == "aBCDefgx"); + + r = tr("abcdefgh", "b-dh", "B-CDx"); + assert(r == "aBCDefgx"); + + r = tr("abcdefgh", "b-dh", "B-BCDx"); + assert(r == "aBCDefgx"); + + r = tr("abcdef", "ef", "*", "c"); + assert(r == "****ef"); + + r = tr("abcdef", "ef", "", "d"); + assert(r == "abcd"); + + r = tr("hello goodbye", "lo", null, "s"); + assert(r == "helo godbye"); + + r = tr("hello goodbye", "lo", "x", "s"); + assert(r == "hex gxdbye"); + + r = tr("14-Jul-87", "a-zA-Z", " ", "cs"); + assert(r == " Jul "); + + r = tr("Abc", "AAA", "XYZ"); + assert(r == "Xbc"); +} + + +/* ************************************************ + * Version : v0.3 + * Author : David L. 'SpottedTiger' Davis + * Date Created : 31.May.05 Compiled and Tested with dmd v0.125 + * Date Modified : 01.Jun.05 Modified the function to handle the + * : imaginary and complex float-point + * : datatypes. + * : + * Licence : Public Domain / Contributed to Digital Mars + */ + +/** + * [in] char[] s can be formatted in the following ways: + * + * Integer Whole Number: + * (for byte, ubyte, short, ushort, int, uint, long, and ulong) + * ['+'|'-']digit(s)[U|L|UL] + * + * examples: 123, 123UL, 123L, +123U, -123L + * + * Floating-Point Number: + * (for float, double, real, ifloat, idouble, and ireal) + * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] + * or [nan|nani|inf|-inf] + * + * examples: +123., -123.01, 123.3e-10f, 123.3e-10fi, 123.3e-10L + * + * (for cfloat, cdouble, and creal) + * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+] + * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]] + * or [nan|nani|nan+nani|inf|-inf] + * + * examples: nan, -123e-1+456.9e-10Li, +123e+10+456i, 123+456 + * + * [in] bool bAllowSep + * False by default, but when set to true it will accept the + * separator characters "," and "_" within the string, but these + * characters should be stripped from the string before using any + * of the conversion functions like toInt(), toFloat(), and etc + * else an error will occur. + * + * Also please note, that no spaces are allowed within the string + * anywhere whether it's a leading, trailing, or embedded space(s), + * thus they too must be stripped from the string before using this + * function, or any of the conversion functions. + */ + +final bool isNumeric(in char[] s, in bool bAllowSep = false) +{ + int iLen = s.length; + bool bDecimalPoint = false; + bool bExponent = false; + bool bComplex = false; + char[] sx = std.string.tolower(s); + int j = 0; + char c; + + //writefln("isNumeric(char[], bool = false) called!"); + // Empty string, return false + if (iLen == 0) + return false; + + // Check for NaN (Not a Number) + if (sx == "nan" || sx == "nani" || sx == "nan+nani") + return true; + + // Check for Infinity + if (sx == "inf" || sx == "-inf") + return true; + + // A sign is allowed only in the 1st character + if (sx[0] == '-' || sx[0] == '+') + j++; + + for (int i = j; i < iLen; i++) + { + c = sx[i]; + + // Digits are good, continue checking + // with the next character... ;) + if (c >= '0' && c <= '9') + continue; + + // Check for the complex type, and if found + // reset the flags for checking the 2nd number. + else if (c == '+') + if (i > 0) + { + bDecimalPoint = false; + bExponent = false; + bComplex = true; + continue; + } + else + return false; + + // Allow only one exponent per number + else if (c == 'e') + { + // A 2nd exponent found, return not a number + if (bExponent) + return false; + + if (i + 1 < iLen) + { + // Look forward for the sign, and if + // missing then this is not a number. + if (sx[i + 1] != '-' && sx[i + 1] != '+') + return false; + else + { + bExponent = true; + i++; + } + } + else + // Ending in "E", return not a number + return false; + } + // Allow only one decimal point per number to be used + else if (c == '.' ) + { + // A 2nd decimal point found, return not a number + if (bDecimalPoint) + return false; + + bDecimalPoint = true; + continue; + } + // Check for ending literal characters: "f,u,l,i,ul,fi,li", + // and wheater they're being used with the correct datatype. + else if (i == iLen - 2) + { + // Integer Whole Number + if (sx[i..iLen] == "ul" && + (!bDecimalPoint && !bExponent && !bComplex)) + return true; + // Floating-Point Number + else if ((sx[i..iLen] == "fi" || sx[i..iLen] == "li") && + (bDecimalPoint || bExponent || bComplex)) + return true; + else if (sx[i..iLen] == "ul" && + (bDecimalPoint || bExponent || bComplex)) + return false; + // Could be a Integer or a Float, thus + // all these suffixes are valid for both + else if (sx[i..iLen] == "ul" || + sx[i..iLen] == "fi" || + sx[i..iLen] == "li") + return true; + else + return false; + } + else if (i == iLen - 1) + { + // Integer Whole Number + if ((c == 'u' || c == 'l') && + (!bDecimalPoint && !bExponent && !bComplex)) + return true; + // Check to see if the last character in the string + // is the required 'i' character + else if (bComplex) + if (c == 'i') + return true; + else + return false; + // Floating-Point Number + else if ((c == 'l' || c == 'f' || c == 'i') && + (bDecimalPoint || bExponent)) + return true; + // Could be a Integer or a Float, thus + // all these suffixes are valid for both + else if (c == 'l' || c == 'f' || c == 'i') + return true; + else + return false; + } + else + // Check if separators are allow + // to be in the numeric string + if (bAllowSep == true && (c == '_' || c == ',')) + continue; + else + return false; + } + + return true; +} + +/// Allow any object as a parameter +bool isNumeric(...) +{ + return isNumeric(_arguments, _argptr); +} + +/// Check only the first parameter, all others will be ignored. +bool isNumeric(TypeInfo[] _arguments, va_list _argptr) +{ + char[] s = ""; + wchar[] ws = ""; + dchar[] ds = ""; + + //writefln("isNumeric(...) called!"); + if (_arguments.length == 0) + return false; + + if (_arguments[0] == typeid(char[])) + return isNumeric(va_arg!(char[])(_argptr)); + else if (_arguments[0] == typeid(wchar[])) + return isNumeric(std.utf.toUTF8(va_arg!(wchar[])(_argptr))); + else if (_arguments[0] == typeid(dchar[])) + return isNumeric(std.utf.toUTF8(va_arg!(dchar[])(_argptr))); + else if (_arguments[0] == typeid(real)) + return true; + else if (_arguments[0] == typeid(double)) + return true; + else if (_arguments[0] == typeid(float)) + return true; + else if (_arguments[0] == typeid(ulong)) + return true; + else if (_arguments[0] == typeid(long)) + return true; + else if (_arguments[0] == typeid(uint)) + return true; + else if (_arguments[0] == typeid(int)) + return true; + else if (_arguments[0] == typeid(ushort)) + return true; + else if (_arguments[0] == typeid(short)) + return true; + else if (_arguments[0] == typeid(ubyte)) + { + s.length = 1; + s[0]= va_arg!(ubyte)(_argptr); + return isNumeric(cast(char[])s); + } + else if (_arguments[0] == typeid(byte)) + { + s.length = 1; + s[0] = va_arg!(byte)(_argptr); + return isNumeric(cast(char[])s); + } + else if (_arguments[0] == typeid(ireal)) + return true; + else if (_arguments[0] == typeid(idouble)) + return true; + else if (_arguments[0] == typeid(ifloat)) + return true; + else if (_arguments[0] == typeid(creal)) + return true; + else if (_arguments[0] == typeid(cdouble)) + return true; + else if (_arguments[0] == typeid(cfloat)) + return true; + else if (_arguments[0] == typeid(char)) + { + s.length = 1; + s[0] = va_arg!(char)(_argptr); + return isNumeric(s); + } + else if (_arguments[0] == typeid(wchar)) + { + ws.length = 1; + ws[0] = va_arg!(wchar)(_argptr); + return isNumeric(std.utf.toUTF8(ws)); + } + else if (_arguments[0] == typeid(dchar)) + { + ds.length = 1; + ds[0] = va_arg!(dchar)(_argptr); + return isNumeric(std.utf.toUTF8(ds)); + } + //else if (_arguments[0] == typeid(cent)) + // return true; + //else if (_arguments[0] == typeid(ucent)) + // return true; + else + return false; +} + +unittest +{ + debug (string) printf("isNumeric(in char[], bool = false).unittest\n"); + char[] s; + + // Test the isNumeric(in char[]) function + assert(isNumeric("1") == true ); + assert(isNumeric("1.0") == true ); + assert(isNumeric("1e-1") == true ); + assert(isNumeric("12345xxxx890") == false ); + assert(isNumeric("567L") == true ); + assert(isNumeric("23UL") == true ); + assert(isNumeric("-123..56f") == false ); + assert(isNumeric("12.3.5.6") == false ); + assert(isNumeric(" 12.356") == false ); + assert(isNumeric("123 5.6") == false ); + assert(isNumeric("1233E-1+1.0e-1i") == true ); + + assert(isNumeric("123.00E-5+1234.45E-12Li") == true); + assert(isNumeric("123.00e-5+1234.45E-12iL") == false); + assert(isNumeric("123.00e-5+1234.45e-12uL") == false); + assert(isNumeric("123.00E-5+1234.45e-12lu") == false); + + assert(isNumeric("123fi") == true); + assert(isNumeric("123li") == true); + assert(isNumeric("--123L") == false); + assert(isNumeric("+123.5UL") == false); + assert(isNumeric("123f") == true); + assert(isNumeric("123.u") == false); + + assert(isNumeric(std.string.toString(real.nan)) == true); + assert(isNumeric(std.string.toString(-real.infinity)) == true); + assert(isNumeric(std.string.toString(123e+2+1234.78Li)) == true); + + s = "$250.99-"; + assert(isNumeric(s[1..s.length - 2]) == true); + assert(isNumeric(s) == false); + assert(isNumeric(s[0..s.length - 1]) == false); + + // These test calling the isNumeric(...) function + assert(isNumeric(1,123UL) == true); + assert(isNumeric('2') == true); + assert(isNumeric('x') == false); + assert(isNumeric(cast(byte)0x57) == false); // 'W' + assert(isNumeric(cast(byte)0x37) == true); // '7' + assert(isNumeric(cast(wchar[])"145.67") == true); + assert(isNumeric(cast(dchar[])"145.67U") == false); + assert(isNumeric(123_000.23fi) == true); + assert(isNumeric(123.00E-5+1234.45E-12Li) == true); + assert(isNumeric(real.nan) == true); + assert(isNumeric(-real.infinity) == true); +} + + +/***************************** + * Soundex algorithm. + * + * The Soundex algorithm converts a word into 4 characters + * based on how the word sounds phonetically. The idea is that + * two spellings that sound alike will have the same Soundex + * value, which means that Soundex can be used for fuzzy matching + * of names. + * + * Params: + * string = String to convert to Soundex representation. + * buffer = Optional 4 char array to put the resulting Soundex + * characters into. If null, the return value + * buffer will be allocated on the heap. + * Returns: + * The four character array with the Soundex result in it. + * Returns null if there is no Soundex representation for the string. + * + * See_Also: + * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia), + * $(LINK2 http://www.archives.gov/publications/general-info-leaflets/55.html, The Soundex Indexing System) + * + * Bugs: + * Only works well with English names. + * There are other arguably better Soundex algorithms, + * but this one is the standard one. + */ + +char[] soundex(char[] string, char[] buffer = null) +in +{ + assert(!buffer || buffer.length >= 4); +} +out (result) +{ + if (result) + { + assert(result.length == 4); + assert(result[0] >= 'A' && result[0] <= 'Z'); + foreach (char c; result[1 .. 4]) + assert(c >= '0' && c <= '6'); + } +} +body +{ + static char[26] dex = + // ABCDEFGHIJKLMNOPQRSTUVWXYZ + "01230120022455012623010202"; + + int b = 0; + char lastc; + foreach (char c; string) + { + if (c >= 'a' && c <= 'z') + c -= 'a' - 'A'; + else if (c >= 'A' && c <= 'Z') + { + ; + } + else + { lastc = lastc.init; + continue; + } + if (b == 0) + { + if (!buffer) + buffer = new char[4]; + buffer[0] = c; + b++; + lastc = dex[c - 'A']; + } + else + { + if (c == 'H' || c == 'W') + continue; + if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') + lastc = lastc.init; + c = dex[c - 'A']; + if (c != '0' && c != lastc) + { + buffer[b] = c; + b++; + lastc = c; + } + } + if (b == 4) + goto Lret; + } + if (b == 0) + buffer = null; + else + buffer[b .. 4] = '0'; +Lret: + return buffer; +} + +unittest +{ char[4] buffer; + + assert(soundex(null) == null); + assert(soundex("") == null); + assert(soundex("0123^&^^**&^") == null); + assert(soundex("Euler") == "E460"); + assert(soundex(" Ellery ") == "E460"); + assert(soundex("Gauss") == "G200"); + assert(soundex("Ghosh") == "G200"); + assert(soundex("Hilbert") == "H416"); + assert(soundex("Heilbronn") == "H416"); + assert(soundex("Knuth") == "K530"); + assert(soundex("Kant", buffer) == "K530"); + assert(soundex("Lloyd") == "L300"); + assert(soundex("Ladd") == "L300"); + assert(soundex("Lukasiewicz", buffer) == "L222"); + assert(soundex("Lissajous") == "L222"); + assert(soundex("Robert") == "R163"); + assert(soundex("Rupert") == "R163"); + assert(soundex("Rubin") == "R150"); + assert(soundex("Washington") == "W252"); + assert(soundex("Lee") == "L000"); + assert(soundex("Gutierrez") == "G362"); + assert(soundex("Pfister") == "P236"); + assert(soundex("Jackson") == "J250"); + assert(soundex("Tymczak") == "T522"); + assert(soundex("Ashcraft") == "A261"); + + assert(soundex("Woo") == "W000"); + assert(soundex("Pilgrim") == "P426"); + assert(soundex("Flingjingwaller") == "F452"); + assert(soundex("PEARSE") == "P620"); + assert(soundex("PIERCE") == "P620"); + assert(soundex("Price") == "P620"); + assert(soundex("CATHY") == "C300"); + assert(soundex("KATHY") == "K300"); + assert(soundex("Jones") == "J520"); + assert(soundex("johnsons") == "J525"); + assert(soundex("Hardin") == "H635"); + assert(soundex("Martinez") == "M635"); +} + + +/*************************************************** + * Construct an associative array consisting of all + * abbreviations that uniquely map to the strings in values. + * + * This is useful in cases where the user is expected to type + * in one of a known set of strings, and the program will helpfully + * autocomplete the string once sufficient characters have been + * entered that uniquely identify it. + * Example: + * --- + * import std.stdio; + * import std.string; + * + * void main() + * { + * static char[][] list = [ "food", "foxy" ]; + * + * auto abbrevs = std.string.abbrev(list); + * + * foreach (key, value; abbrevs) + * { + * writefln("%s => %s", key, value); + * } + * } + * --- + * produces the output: + * <pre> + * fox => foxy + * food => food + * foxy => foxy + * foo => food + * </pre> + */ + +char[][char[]] abbrev(char[][] values) +{ + char[][char[]] result; + + // Make a copy when sorting so we follow COW principles. + values = values.dup.sort; + + size_t values_length = values.length; + size_t lasti = values_length; + size_t nexti; + + char[] nv; + char[] lv; + + for (size_t i = 0; i < values_length; i = nexti) + { char[] value = values[i]; + + // Skip dups + for (nexti = i + 1; nexti < values_length; nexti++) + { nv = values[nexti]; + if (value != values[nexti]) + break; + } + + for (size_t j = 0; j < value.length; j += std.utf.stride(value, j)) + { char[] v = value[0 .. j]; + + if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) && + (lasti == values_length || j > lv.length || v != lv[0 .. j])) + result[v] = value; + } + result[value] = value; + lasti = i; + lv = value; + } + + return result; +} + +unittest +{ + debug(string) printf("string.abbrev.unittest\n"); + + char[][] values; + values ~= "hello"; + values ~= "hello"; + values ~= "he"; + + char[][char[]] r; + + r = abbrev(values); + char[][] keys = r.keys.dup; + keys.sort; + + assert(keys.length == 4); + assert(keys[0] == "he"); + assert(keys[1] == "hel"); + assert(keys[2] == "hell"); + assert(keys[3] == "hello"); + + assert(r[keys[0]] == "he"); + assert(r[keys[1]] == "hello"); + assert(r[keys[2]] == "hello"); + assert(r[keys[3]] == "hello"); +} + + +/****************************************** + * Compute column number after string if string starts in the + * leftmost column, which is numbered starting from 0. + */ + +size_t column(char[] string, int tabsize = 8) +{ + size_t column; + + foreach (dchar c; string) + { + switch (c) + { + case '\t': + column = (column + tabsize) / tabsize * tabsize; + break; + + case '\r': + case '\n': + case PS: + case LS: + column = 0; + break; + + default: + column++; + break; + } + } + return column; +} + +unittest +{ + debug(string) printf("string.column.unittest\n"); + + assert(column(null) == 0); + assert(column("") == 0); + assert(column("\t") == 8); + assert(column("abc\t") == 8); + assert(column("12345678\t") == 16); +} + +/****************************************** + * Wrap text into a paragraph. + * + * The input text string s is formed into a paragraph + * by breaking it up into a sequence of lines, delineated + * by \n, such that the number of columns is not exceeded + * on each line. + * The last line is terminated with a \n. + * Params: + * s = text string to be wrapped + * columns = maximum number of _columns in the paragraph + * firstindent = string used to _indent first line of the paragraph + * indent = string to use to _indent following lines of the paragraph + * tabsize = column spacing of tabs + * Returns: + * The resulting paragraph. + */ + +char[] wrap(char[] s, int columns = 80, char[] firstindent = null, + char[] indent = null, int tabsize = 8) +{ + char[] result; + int col; + int spaces; + bool inword; + bool first = true; + size_t wordstart; + + result.length = firstindent.length + s.length; + result.length = firstindent.length; + result[] = firstindent[]; + col = column(result, tabsize); + foreach (size_t i, dchar c; s) + { + if (iswhite(c)) + { + if (inword) + { + if (first) + { + ; + } + else if (col + 1 + (i - wordstart) > columns) + { + result ~= '\n'; + result ~= indent; + col = column(indent, tabsize); + } + else + { result ~= ' '; + col += 1; + } + result ~= s[wordstart .. i]; + col += i - wordstart; + inword = false; + first = false; + } + } + else + { + if (!inword) + { + wordstart = i; + inword = true; + } + } + } + + if (inword) + { + if (col + 1 + (s.length - wordstart) >= columns) + { + result ~= '\n'; + result ~= indent; + } + else if (result.length != firstindent.length) + result ~= ' '; + result ~= s[wordstart .. s.length]; + } + result ~= '\n'; + + return result; +} + +unittest +{ + debug(string) printf("string.wrap.unittest\n"); + + assert(wrap(null) == "\n"); + assert(wrap(" a b df ") == "a b df\n"); + //writefln("'%s'", wrap(" a b df ",3)); + assert(wrap(" a b df ", 3) == "a b\ndf\n"); + assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n"); + //writefln("'%s'", wrap(" abcd df ",3)); + assert(wrap(" abcd df ", 3) == "abcd\ndf\n"); + assert(wrap("x") == "x\n"); + assert(wrap("u u") == "u u\n"); +} + + +/*************************** + * Does string s[] start with an email address? + * Returns: + * null it does not + * char[] it does, and this is the slice of s[] that is that email address + * References: + * RFC2822 + */ +char[] isEmail(char[] s) +{ size_t i; + + if (!isalpha(s[0])) + goto Lno; + + for (i = 1; 1; i++) + { + if (i == s.length) + goto Lno; + auto c = s[i]; + if (isalnum(c)) + continue; + if (c == '-' || c == '_' || c == '.') + continue; + if (c != '@') + goto Lno; + i++; + break; + } + //writefln("test1 '%s'", s[0 .. i]); + + /* Now do the part past the '@' + */ + size_t lastdot; + for (; i < s.length; i++) + { + auto c = s[i]; + if (isalnum(c)) + continue; + if (c == '-' || c == '_') + continue; + if (c == '.') + { + lastdot = i; + continue; + } + break; + } + if (!lastdot || (i - lastdot != 3 && i - lastdot != 4)) + goto Lno; + + return s[0 .. i]; + +Lno: + return null; +} + + +/*************************** + * Does string s[] start with a URL? + * Returns: + * null it does not + * char[] it does, and this is the slice of s[] that is that URL + */ + +char[] isURL(char[] s) +{ + /* Must start with one of: + * http:// + * https:// + * www. + */ + + size_t i; + + if (s.length <= 4) + goto Lno; + + //writefln("isURL(%s)", s); + if (s.length > 7 && std.string.icmp(s[0 .. 7], "http://") == 0) + i = 7; + else if (s.length > 8 && std.string.icmp(s[0 .. 8], "https://") == 0) + i = 8; +// if (icmp(s[0 .. 4], "www.") == 0) +// i = 4; + else + goto Lno; + + size_t lastdot; + for (; i < s.length; i++) + { + auto c = s[i]; + if (isalnum(c)) + continue; + if (c == '-' || c == '_' || c == '?' || + c == '=' || c == '%' || c == '&' || + c == '/' || c == '+' || c == '#' || + c == '~') + continue; + if (c == '.') + { + lastdot = i; + continue; + } + break; + } + //if (!lastdot || (i - lastdot != 3 && i - lastdot != 4)) + if (!lastdot) + goto Lno; + + return s[0 .. i]; + +Lno: + return null; +} + +