Mercurial > projects > dynamin
annotate dynamin/core/string.d @ 111:8ba1044adc93
Rename Newline.Macintosh/Linux to better names.
CR is not used on Mac since before OS X. And LF is not just used on
Linux, but other Unix systems too.
author | Jordan Miner <jminer7@gmail.com> |
---|---|
date | Sat, 19 Jan 2013 20:57:11 -0600 |
parents | 6613b65a6035 |
children |
rev | line source |
---|---|
0 | 1 |
2 /* | |
103
73060bc3f004
Change license to Boost 1.0 and MPL 2.0.
Jordan Miner <jminer7@gmail.com>
parents:
102
diff
changeset
|
3 * Copyright Jordan Miner |
0 | 4 * |
103
73060bc3f004
Change license to Boost 1.0 and MPL 2.0.
Jordan Miner <jminer7@gmail.com>
parents:
102
diff
changeset
|
5 * Distributed under the Boost Software License, Version 1.0. |
73060bc3f004
Change license to Boost 1.0 and MPL 2.0.
Jordan Miner <jminer7@gmail.com>
parents:
102
diff
changeset
|
6 * (See accompanying file BOOST_LICENSE.txt or copy at |
73060bc3f004
Change license to Boost 1.0 and MPL 2.0.
Jordan Miner <jminer7@gmail.com>
parents:
102
diff
changeset
|
7 * http://www.boost.org/LICENSE_1_0.txt) |
0 | 8 * |
9 */ | |
10 | |
11 /** | |
12 * These functions should all return a new string if it is possible for them | |
13 * to ever modify their input. If they will never modify their input, they | |
14 * should always return a slice. | |
15 */ | |
16 module dynamin.core.string; | |
17 | |
102
604d20cac836
Add dynamin.core.array and put contains() there.
Jordan Miner <jminer7@gmail.com>
parents:
94
diff
changeset
|
18 public import dynamin.core.array; |
604d20cac836
Add dynamin.core.array and put contains() there.
Jordan Miner <jminer7@gmail.com>
parents:
94
diff
changeset
|
19 |
0 | 20 import tango.core.Exception; |
21 import tango.text.convert.Utf; | |
22 import tango.text.convert.Layout; | |
23 import tango.text.Unicode; | |
24 import dynamin.core.global; | |
25 import dynamin.core.math; | |
26 | |
106 | 27 /// Defined as char[] |
28 alias char[] mstring; | |
29 /// Defined as const(char)[] | |
30 alias const(char)[] cstring; | |
0 | 31 |
32 /// | |
106 | 33 char* toCharPtr(cstring str) { |
34 return (str~'\0').dup.ptr; | |
0 | 35 } |
36 /// | |
106 | 37 wchar* toWcharPtr(cstring str) { |
0 | 38 return toString16(str~'\0').ptr; |
39 } | |
40 | |
41 /* | |
106 | 42 string toString(ulong num, uint base = 10) { |
0 | 43 if(base > 16) |
106 | 44 throw new Exception("toString() - radix more than 16"); |
0 | 45 char[] digits = "0123456789abcdef"; |
46 string str; | |
47 ulong div = base; | |
48 ulong prevDiv = 1; | |
49 do { | |
50 uint rem = num % div; | |
51 str ~= digits[rem/prevDiv]; | |
52 prevDiv = div; | |
53 div *= base; | |
54 num -= rem; | |
55 } while(num > 0); | |
56 str.reverse; | |
57 return str; | |
58 } | |
59 */ | |
94
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
60 |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
61 // TODO: move to encoding.d |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
62 /** |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
63 * Returns true if the specified code unit is a high surrogate, in the range of 0xD800 to 0xDBFF. |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
64 * A high surrogate comes before a low surrogate in a surrogate pair. |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
65 */ |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
66 bool isHighSurrogate(wchar c) { |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
67 return c >= 0xD800 && c <= 0xDBFF; |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
68 } |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
69 /** |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
70 * Returns true if the specified code unit is a low surrogate, in the range of 0xDC00 to 0xDFFF. |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
71 * A low surrogate comes after a high surrogate in a surrogate pair. |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
72 */ |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
73 bool isLowSurrogate(wchar c) { |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
74 return c >= 0xDC00 && c <= 0xDFFF; |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
75 } |
3b0c5c599003
Add isHighSurrogate() and isLowSurrogate().
Jordan Miner <jminer7@gmail.com>
parents:
0
diff
changeset
|
76 |
0 | 77 Layout!(char) formatter; |
78 static this() { | |
79 formatter = new Layout!(char); | |
80 } | |
106 | 81 string format(cstring str, ...) { |
82 return formatter.convert(_arguments, _argptr, str).idup; | |
0 | 83 } |
84 unittest { | |
85 assert(format("I am {}", 20) == "I am 20"); | |
86 } | |
87 | |
88 /** | |
106 | 89 * Converts all lowercase characters in the specified string to uppercase. Obviously, the |
90 * conversion is not done in place, but in-place conversion can be accomplished by passing | |
91 * the same string as `buffer`. | |
92 * | |
93 * Do not use this function to normalize strings to the same case (like to compare them). Instead, | |
94 * use toUppercase(). See http://msdn.microsoft.com/en-us/library/bb386042%28v=vs.90%29.aspx. | |
95 * | |
0 | 96 * Examples: |
97 * ----- | |
98 * "Bounce the ball.".upcase() == "BOUNCE THE BALL." | |
99 * "Mañana".upcase() == "MAÑANA" | |
100 * "æóëø".upcase() == "ÆÓËØ" | |
101 * ----- | |
102 */ | |
106 | 103 mstring upcase(cstring str, mstring buffer = null) { // TODO: use buffer |
0 | 104 return toUpper(str); |
105 } | |
106 unittest { | |
107 assert("Bounce the ball.".upcase() == "BOUNCE THE BALL."); | |
108 assert("Mañana".upcase() == "MAÑANA"); | |
109 assert("æóëø".upcase() == "ÆÓËØ"); | |
110 | 110 assert("ΣΕ".downcase() == "σε"); |
0 | 111 } |
106 | 112 |
0 | 113 /** |
106 | 114 * Converts all uppercase characters in the specified string to lowercase. Obviously, the |
115 * conversion is not done in place, but in-place conversion can be accomplished by passing | |
116 * the same string as `buffer`. | |
117 * | |
0 | 118 * Examples: |
119 * ----- | |
120 * "BoUnCe ThE BALL.".downcase() == "bounce the ball." | |
121 * "MAÑANA".downcase() == "mañana" | |
122 * "ÆÓËØ".downcase() == "æóëø" | |
123 * ----- | |
124 */ | |
106 | 125 mstring downcase(cstring str, mstring buffer = null) { // TODO: use buffer |
0 | 126 return toLower(str); |
127 } | |
128 unittest { | |
129 assert("BoUnCe ThE BALL.".downcase() == "bounce the ball."); | |
130 assert("MAÑANA".downcase() == "mañana"); | |
131 assert("ÆÓËØ".downcase() == "æóëø"); | |
106 | 132 assert("ΣΕ".downcase() == "σε"); |
0 | 133 } |
134 | |
135 // TODO: make more use of delegates in these? | |
136 // TODO; use templates so that these work with wchar and dchar? | |
106 | 137 bool startsWith(cstring str, cstring subStr, int start = 0) { |
0 | 138 if(start+subStr.length > str.length) |
139 return false; | |
140 return str[start..start+subStr.length] == subStr; | |
141 } | |
106 | 142 bool endsWith(cstring str, cstring subStr) { |
0 | 143 return endsWith(str, subStr, str.length); |
144 } | |
106 | 145 bool endsWith(cstring str, cstring subStr, int start) { |
0 | 146 if(start-subStr.length < 0) |
147 return false; | |
148 return str[str.length-subStr.length..str.length] == subStr; | |
149 } | |
106 | 150 int findLast(cstring str, cstring subStr) { |
0 | 151 return findLast(str, subStr, str.length); |
152 } | |
106 | 153 int findLast(cstring str, cstring subStr, int start) { |
0 | 154 for(int i = start-subStr.length; i >= 0; --i) |
155 if(str[i..i+subStr.length] == subStr) | |
156 return i; | |
157 return -1; | |
158 } | |
106 | 159 int find(cstring str, cstring subStr, int start = 0) { |
0 | 160 for(int i = start; i < str.length-subStr.length; ++i) |
161 if(str[i..i+subStr.length] == subStr) | |
162 return i; | |
163 return -1; | |
164 } | |
102
604d20cac836
Add dynamin.core.array and put contains() there.
Jordan Miner <jminer7@gmail.com>
parents:
94
diff
changeset
|
165 |
106 | 166 mstring remove(cstring str, int start, int count = 1, mstring buffer = null) { // TODO: use buffer |
167 // can't use concatenation because const(char)[] ~ const(char)[] is const(char)[] | |
168 //return str[0..start] ~ str[start+count..str.length]; | |
169 | |
170 mstring str2 = new char[str.length - count]; | |
171 str2[0..start] = str[0..start]; | |
172 str2[start..start + count] = str[start+count..str.length]; | |
173 return str2; | |
0 | 174 } |
175 // TODO: ? | |
176 // split(string str, int delegate(string s) func) | |
177 //string[] split(string str, string subStr) { | |
178 // return split(str, (string s) { return s.startsWith(subStr) ? subStr.length, : -1; }; | |
179 //} | |
106 | 180 |
181 /// Returns slices. | |
0 | 182 //split1("50=20=10", "=") -> ["50", "20=10"] |
106 | 183 inout(char)[][] split1(inout(char)[] str, cstring subStr) { |
0 | 184 if(subStr.length == 0) |
185 return [str]; | |
186 int index = find(str, subStr); | |
187 if(index == -1) | |
188 return [str]; | |
106 | 189 auto strs = new inout(char)[][2]; |
190 strs[0] = str[0..index]; | |
191 strs[1] = str[index+subStr.length..str.length]; | |
0 | 192 return strs; |
193 } | |
194 //split("50=20=10", "=") -> ["50", "20", "10"] | |
106 | 195 inout(char)[][] split(inout(char)[] str, cstring subStr) { |
0 | 196 if(subStr.length == 0) |
197 return [str]; | |
106 | 198 inout(char)[][] strs; |
0 | 199 int index, searchFrom; |
200 int i = 0; | |
201 while(searchFrom < str.length) { | |
202 index = find(str, subStr, searchFrom); | |
203 if(index == -1) index = str.length; | |
204 strs.length = strs.length+1; | |
106 | 205 strs[i] = str[searchFrom..index]; |
0 | 206 ++i; |
207 searchFrom = index+subStr.length; | |
208 } | |
209 return strs; | |
210 } | |
211 /// | |
212 enum Newline { | |
213 /// | |
214 Cr = 0, | |
215 /// | |
216 Lf = 1, | |
217 /// | |
218 Crlf = 2, | |
219 /// | |
111
8ba1044adc93
Rename Newline.Macintosh/Linux to better names.
Jordan Miner <jminer7@gmail.com>
parents:
110
diff
changeset
|
220 ClassicMacOS = 0, |
0 | 221 /// |
111
8ba1044adc93
Rename Newline.Macintosh/Linux to better names.
Jordan Miner <jminer7@gmail.com>
parents:
110
diff
changeset
|
222 Unix = 1, |
0 | 223 /// |
224 Windows = 2 | |
225 } | |
226 /** | |
227 * Changes every occurrence of a newline in the specified string to the specified newline. | |
228 * Examples: | |
229 * ----- | |
230 * "\r\n\n\r".convertNewlines(Newline.Lf) == "\n\n\n" | |
231 * "\r\n\n\r".convertNewlines(Newline.Windows) == "\r\n\r\n\r\n" | |
111
8ba1044adc93
Rename Newline.Macintosh/Linux to better names.
Jordan Miner <jminer7@gmail.com>
parents:
110
diff
changeset
|
232 * "\n\r\n".convertNewlines(Newline.ClassicMacOS) == "\r\r" |
0 | 233 * ----- |
234 */ | |
106 | 235 mstring convertNewlines(cstring str, Newline nl, mstring buffer = null) { // TODO: use buffer |
0 | 236 string lineSep; |
106 | 237 final switch(nl) { |
0 | 238 case Newline.Cr: lineSep = "\r"; break; |
239 case Newline.Lf: lineSep = "\n"; break; | |
240 case Newline.Crlf: lineSep = "\r\n"; break; | |
241 } | |
242 return str.replace([cast(string)"\r\n", "\r", "\n"], lineSep); | |
243 } | |
244 unittest { | |
245 assert("\r\n\n\r".convertNewlines(Newline.Lf) == "\n\n\n"); | |
246 assert("\r\n\n\r".convertNewlines(Newline.Windows) == "\r\n\r\n\r\n"); | |
111
8ba1044adc93
Rename Newline.Macintosh/Linux to better names.
Jordan Miner <jminer7@gmail.com>
parents:
110
diff
changeset
|
247 assert("\n\r\n".convertNewlines(Newline.ClassicMacOS) == "\r\r"); |
0 | 248 } |
249 | |
250 /** | |
251 * Joins all the strings in the specified array together into one string, putting | |
252 * the specified separator between them. | |
253 * Examples: | |
254 * ----- | |
255 * join(["10", "15", "17"], " - ") == "10 - 15 - 17" | |
256 * join(["789", "672", "484"], ",") == "789,672,484" | |
257 * join(["aol.com", "join", "intro.html"], "/") == "aol.com/join/intro.html" | |
258 * ----- | |
259 */ | |
106 | 260 mstring join(string[] strs, cstring sep) { |
0 | 261 if(strs.length == 0) |
106 | 262 return "".dup; |
0 | 263 int len; |
264 foreach(string s; strs) | |
265 len += s.length; | |
266 len += sep.length*(strs.length-1); | |
267 | |
106 | 268 mstring newStr = new char[len]; |
0 | 269 newStr[0..strs[0].length] = strs[0]; |
270 int start = strs[0].length; | |
271 for(int i = 1; i < strs.length; ++i) { | |
272 auto str = strs[i]; | |
273 newStr[start..start+sep.length] = sep; | |
274 start += sep.length; | |
275 newStr[start..start+str.length] = str; | |
276 start += str.length; | |
277 } | |
278 return newStr; | |
279 } | |
280 unittest { | |
106 | 281 // TODO: remove cast(mstring) when D has bugs fixed |
0 | 282 assert(join(["10", "15", "17"], " - ") == "10 - 15 - 17"); |
283 assert(join(["789", "672", "484"], ",") == "789,672,484"); | |
106 | 284 assert(join(["aol.com", "join", "intro.html"], "/") == "aol.com/join/intro.html"); |
0 | 285 } |
286 | |
287 /** | |
288 * Multiplies the given string the specified number of times. | |
289 * Returns: a string that is the result of adding the specified string onto | |
290 * an empty string the specified number of times | |
291 * Examples: | |
292 * ----- | |
293 * "Hi...".times(3) == "Hi...Hi...Hi..." | |
294 * "0".times(20) == "00000000000000000000" | |
295 * "Hi".times(0) == "" | |
296 * ----- | |
297 */ | |
106 | 298 mstring times(cstring str, int n) { |
299 mstring newStr = new char[n * str.length]; | |
0 | 300 for(int i = 0; i < newStr.length; i += str.length) |
301 newStr[i..i+str.length] = str; | |
302 return newStr; | |
303 } | |
304 unittest { | |
305 assert("0".times(4) == "0000"); | |
306 assert("Hello! ".times(2) == "Hello! Hello! "); | |
307 assert("".times(50) == ""); | |
308 assert("Hi".times(0) == ""); | |
309 } | |
310 | |
311 // TODO: flesh out and make public | |
312 struct sbuilder { | |
106 | 313 int count; |
314 mstring data; | |
315 void add(char c) { | |
316 if(count + 1 > data.length) | |
317 data.length = (data.length + 1) * 2; | |
318 data[count] = c; | |
319 ++count; | |
0 | 320 } |
106 | 321 void add(cstring str) { |
322 if(count + str.length > data.length) | |
323 data.length = max((data.length + 1) * 2, count + str.length); | |
324 data[count..count+str.length] = str; | |
325 count += str.length; | |
0 | 326 } |
106 | 327 mstring toString() { |
328 return data[0..count].dup; | |
0 | 329 } |
330 } | |
331 /** | |
332 * Replaces any occurrence of a specified search string in the specified string | |
333 * with corresponding replacement string. The length of the searchStrs array | |
334 * must equal the length of the replacements array. | |
335 * Examples: | |
336 * ----- | |
337 * "Mississippi".replace(["is", "i"], ["..", "*"]) == "M..s..s*pp*" | |
338 * "Mississippi".replace("ss", "...") == "Mi...i...ippi" | |
339 * "Hello".replace("ll", "y") == "Heyo" | |
340 * "Hi".replace([], []) == "Hi" | |
341 * ----- | |
342 * Note: If multiple search strings have the same prefix, the longer search | |
343 * strings must be given first. Otherwise, any occurrence will match a | |
344 * shorter one and will not have a chance to match any longer one. | |
345 * Examples: | |
346 * ----- | |
347 * "Speaker".replace(["ea", "e"], ":") == "Sp:k:r" | |
348 * "Speaker".replace(["e", "ea"], ":") == "Sp:ak:r" | |
349 * ----- | |
350 * Bug: If a search string has a length of zero, this method will go into an infinite loop. | |
351 */ | |
106 | 352 mstring replace(cstring str, string[] searchStrs, string[] replacements) { |
0 | 353 if(replacements.length == 1 && searchStrs.length > 1) { |
354 string tmp = replacements[0]; | |
355 replacements = new string[searchStrs.length]; | |
106 | 356 foreach(i, dummy; searchStrs) |
357 replacements[i] = tmp; | |
0 | 358 } |
359 if(searchStrs.length != replacements.length) | |
360 throw new IllegalArgumentException( | |
361 "Replace(): searchStrs and replacements must be same length"); | |
362 sbuilder builder; | |
363 loop: | |
364 for(int i = 0; i < str.length; ) { | |
365 foreach(j, subStr; searchStrs) { | |
366 if(i+subStr.length <= str.length && str[i..i+subStr.length] == subStr) { | |
367 // skip the part of string that matched | |
368 i += subStr.length; | |
106 | 369 builder.add(replacements[j]); |
0 | 370 continue loop; |
371 } | |
372 } | |
106 | 373 builder.add(str[i]); |
0 | 374 ++i; |
375 } | |
106 | 376 return builder.toString(); |
0 | 377 } |
378 /// ditto | |
106 | 379 mstring replace(cstring str, string[] searchStrs, string replacement) { |
0 | 380 return str.replace(searchStrs, [replacement]); |
381 } | |
382 /// ditto | |
106 | 383 mstring replace(cstring str, string searchStr, string replacement) { |
0 | 384 return str.replace([searchStr], [replacement]); |
385 } | |
386 unittest { | |
106 | 387 assert("Mississippi".replace(["is", "i"], ["..", "*"]) == "M..s..s*pp*"); |
0 | 388 assert("Mississippi".replace("ss", "...") == "Mi...i...ippi"); |
389 assert("Hello".replace("ll", "y") == "Heyo"); | |
106 | 390 //assert("Hi".replace(cast(mstring[])[], cast(mstring[])[]) == "Hi"); |
391 assert("Speaker".replace(["ea", "e"], ":") == "Sp:k:r"); | |
392 assert("Speaker".replace(["e", "ea"], ":") == "Sp:ak:r"); | |
0 | 393 } |
394 | |
395 /** | |
396 * Changes every occurrence of a specified character in chars to the | |
397 * corresponding character in escChars. | |
398 * Examples: | |
399 * ----- | |
400 * "Line1\r\nLine2\\".escape() == "Line1\\r\\nLine2\\\\" | |
401 * "Line1\tLine2".escape() == "Line1\\tLine2" | |
402 * "Part1|Part2\r\n".escape("|\r\n", "|rn") == "Part1\\|Part2\\r\\n" | |
403 * ----- | |
404 */ | |
106 | 405 mstring escape(cstring str, const(char)[] chars, const(char)[] escChars) { |
0 | 406 if(chars.length != escChars.length) |
106 | 407 throw new IllegalArgumentException("escape(): chars and escChars must be same length"); |
0 | 408 sbuilder builder; |
409 loop: | |
410 foreach(i, c; str) { | |
411 foreach(j, c2; chars) { | |
412 if(c == '\\') { // always escape backslash | |
106 | 413 builder.add('\\'); |
414 builder.add('\\'); | |
0 | 415 continue loop; |
416 } | |
417 if(c == c2) { | |
106 | 418 builder.add('\\'); |
419 builder.add(escChars[j]); | |
0 | 420 continue loop; |
421 } | |
422 } | |
106 | 423 builder.add(c); |
0 | 424 } |
106 | 425 return builder.toString(); |
0 | 426 } |
427 /// ditto | |
106 | 428 mstring escape(cstring str) { |
0 | 429 return str.escape("\t\r\n", "trn"); |
430 } | |
431 unittest { | |
432 assert("Line1\r\nLine2\\".escape() == "Line1\\r\\nLine2\\\\"); | |
433 assert("Line1\tLine2".escape() == "Line1\\tLine2"); | |
434 assert("Part1|Part2\r\n".escape("|\r\n", "|rn") == "Part1\\|Part2\\r\\n"); | |
435 } | |
436 /** | |
437 * Changes every occurrence of a specified character in escChars to the | |
438 * corresponding character in chars. | |
439 * Examples: | |
440 * ----- | |
441 * "Line1\\r\\nLine2".unescape() == "Line1\r\nLine2" | |
442 * "Line1\\tLine2".unescape() == "Line1\tLine2" | |
443 * "Part1\\|Part2\\r\\n".unescape("|rn", "|\r\n") == "Part1|Part2\r\n" | |
444 * // error: | |
445 * "test\\".unescape() | |
446 * ----- | |
447 */ | |
106 | 448 mstring unescape(cstring str, const(char)[] escChars, const(char)[] chars) { |
0 | 449 if(escChars.length != chars.length) |
106 | 450 throw new IllegalArgumentException("unescape(): escChars and chars must be same length"); |
0 | 451 sbuilder builder; |
452 loop: | |
453 foreach(i, c; str) { | |
454 if(c == '\\') { | |
455 if(i == str.length-1) | |
106 | 456 throw new IllegalArgumentException("unescape(): partial escape sequence at end of string"); |
0 | 457 if(str[i+1] == '\\') { |
106 | 458 builder.add('\\'); |
0 | 459 ++i; |
460 continue loop; | |
461 } | |
462 foreach(j, c2; escChars) { | |
463 if(str[i+1] == c2) { | |
106 | 464 builder.add(chars[j]); |
0 | 465 ++i; |
466 continue loop; | |
467 } | |
468 } | |
106 | 469 throw new IllegalArgumentException("unescape(): invalid escape sequence"); |
0 | 470 } |
106 | 471 builder.add(str[i]); |
0 | 472 } |
106 | 473 return builder.toString(); |
0 | 474 } |
475 /// ditto | |
106 | 476 mstring unescape(cstring str) { |
0 | 477 return str.unescape("trn", "\t\r\n"); |
478 } | |
479 unittest { | |
480 assert("Line1\\r\\nLine2\\\\".unescape() == "Line1\r\nLine2\\"); | |
481 assert("Line1\\tLine2".unescape() == "Line1\tLine2"); | |
482 assert("Part1\\|Part2\\r\\n".unescape("|rn", "|\r\n") == "Part1|Part2\r\n"); | |
483 } | |
484 unittest { | |
485 string str = r"C:\\n"; | |
486 assert(str.escape().unescape() == str); | |
487 } | |
488 /** | |
489 * Removes all whitespace characters from the specified string. | |
490 * Examples: | |
491 * ----- | |
492 * "4a d2 7c 3f".removeWhitespace() == "4ad27c3f" | |
493 * " Hello \r\n".removeWhitespace() == "Hello" | |
494 * "How are you?".removeWhitespace() == "Howareyou?" | |
495 * "\t \n\r\f\v".removeWhitespace() == "" | |
496 * ----- | |
497 */ | |
106 | 498 mstring removeWhitespace(cstring str) { |
0 | 499 sbuilder builder; |
500 foreach(c; str) | |
501 if(!" \t\n\r\v\f".contains(c)) | |
106 | 502 builder.add(c); |
503 return builder.toString(); | |
0 | 504 } |
505 unittest { | |
506 assert("4a d2 7c 3f".removeWhitespace() == "4ad27c3f"); | |
507 assert(" Hello \r\n".removeWhitespace() == "Hello"); | |
508 assert("How are you?".removeWhitespace() == "Howareyou?"); | |
509 assert("\t \n\r\f\v".removeWhitespace() == ""); | |
510 } | |
511 /** | |
512 * Removes all the whitespace characters from the start and from the end | |
513 * of the specified string. Returns a slice. | |
514 * Examples: | |
515 * ----- | |
516 * " Hello \r\n".trim() == "Hello" | |
517 * "How are you?".trim() == "How are you?" | |
518 * "\n la di da ".trim() == "la di da" | |
519 * " \n".trim() == "" | |
520 * "".trim() == "" | |
521 * ----- | |
522 */ | |
106 | 523 inout(char)[] trim(inout(char)[] str) { |
0 | 524 int start = -1, end = str.length; |
525 while( --end >= 0 && " \t\n\r\v\f".contains(str[end]) ) { } | |
526 end++; | |
527 if(end == 0) // means all whitespace | |
106 | 528 return str[0..0]; |
0 | 529 while(" \t\n\r\v\f".contains(str[++start])) { } |
530 return str[start..end]; | |
531 } | |
532 unittest { | |
533 assert(" Hello \r\n".trim() == "Hello"); | |
534 assert("How are you?".trim() == "How are you?"); | |
535 assert("\n la di da ".trim() == "la di da"); | |
536 assert(" \n".trim() == ""); | |
537 assert("".trim() == ""); | |
538 } | |
539 /** | |
540 * Removes all the whitespace characters from the start | |
541 * of the specified string. Returns a slice. | |
542 * Examples: | |
543 * ----- | |
544 * " Hello \r\n".trimLeft() == "Hello \r\n" | |
545 * "How are you?".trimLeft() == "How are you?" | |
546 * "\n la di da ".trimLeft() == "la di da " | |
547 * " \n".trimLeft() == "" | |
548 * "".trimLeft() == "" | |
549 * ----- | |
550 */ | |
106 | 551 cstring trimLeft(cstring str) { |
0 | 552 int start = -1; |
553 while(++start < str.length && " \t\n\r\v\f".contains(str[start])) { } | |
554 return str[start..$]; | |
555 } | |
556 unittest { | |
557 assert(" Hello \r\n".trimLeft() == "Hello \r\n"); | |
558 assert("How are you?".trimLeft() == "How are you?"); | |
559 assert("\n la di da ".trimLeft() == "la di da "); | |
560 assert(" \n".trimLeft() == ""); | |
561 assert("".trimLeft() == ""); | |
562 } | |
563 /** | |
564 * Removes all the whitespace characters from the start | |
565 * of the specified string. Returns a slice. | |
566 * Examples: | |
567 * ----- | |
568 * " Hello \r\n".trimRight() == " Hello" | |
569 * "How are you?".trimRight() == "How are you?" | |
570 * "\n la di da ".trimRight() == "\n la di da" | |
571 * " \n".trimRight() == "" | |
572 * "".trimRight() == "" | |
573 * ----- | |
574 */ | |
106 | 575 cstring trimRight(cstring str) { |
0 | 576 int end = str.length; |
577 while( --end >= 0 && " \t\n\r\v\f".contains(str[end]) ) { } | |
578 end++; | |
579 return str[0..end]; | |
580 } | |
581 unittest { | |
582 assert(" Hello \r\n".trimRight() == " Hello"); | |
583 assert("How are you?".trimRight() == "How are you?"); | |
584 assert("\n la di da ".trimRight() == "\n la di da"); | |
585 assert(" \n".trimRight() == ""); | |
586 assert("".trimRight() == ""); | |
587 } | |
588 | |
589 | |
590 unittest { | |
591 assert(".NET Framework".startsWith(".N")); | |
592 assert(".NET Framework".startsWith("Frame", 5)); | |
593 assert(!".NET Framework".startsWith(".NEW")); | |
594 assert(".NET Framework".find("NET") == 1); | |
595 assert(".NET Framework".find("NET", 2) == -1); | |
596 assert(".NET Framework".find("") == 0); | |
597 assert("Mississippi".findLast("ss") == 5); | |
598 assert("Mississippi".findLast("ss", 4) == 2); | |
106 | 599 assert("Jordan=20".split("=") == ["Jordan", "20"]); |
600 assert("Jordan".split("") == ["Jordan"]); | |
601 assert("Jordan".split1("=") == ["Jordan"]); | |
0 | 602 } |
603 | |
604 /*class Encoding { | |
605 private: | |
606 //TODO: remove dependency on std.utf | |
607 static Encoding[] encodings = [ | |
608 //new Encoding("windows-1252".Str(), "Western European (Windows)".Str(), encodeTableWindows1252) | |
609 ]; | |
610 String name; | |
611 String desc; | |
612 public: | |
613 //property | |
614 static Encoding[] Encodings() { | |
615 return encodings; | |
616 } | |
617 static Encoding GetEncoding(String name) { | |
618 } | |
619 static byte[] Convert(Encoding src, Encoding dst, byte[] bytes) {} | |
620 | |
621 this(String name, String description, wchar[] table) { | |
622 this.name = name; | |
623 } | |
624 // example: utf-8 | |
625 String Name() { | |
626 return name; | |
627 } | |
628 // example: Unicode (UTF-8) | |
629 String Description() { | |
630 return desc; | |
631 } | |
632 int GetEncodedCount() {} | |
633 byte[] Encode(String str) { | |
634 } | |
635 //Returns the number of characters | |
636 int GetDecodedLength() { | |
637 } | |
638 String Decode(byte[] bytes) { | |
639 } | |
640 }*/ | |
641 //class Utf8Encoding : Encoding { | |
642 //public: | |
643 //} | |
644 | |
645 //characters that cannot be mapped to unicode are 0xFFFD in the tables | |
646 wchar[] encodeTableWindows1252 = [ | |
647 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, | |
648 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, | |
649 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, | |
650 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021, 0x0022, 0x0023, | |
651 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, | |
652 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, | |
653 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, | |
654 0x003F, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, | |
655 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, | |
656 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, | |
657 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062, | |
658 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, | |
659 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | |
660 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, | |
661 0x007E, 0x007F, 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, | |
662 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, | |
663 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, | |
664 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178, 0x00A0, 0x00A1, | |
665 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, | |
666 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, | |
667 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, | |
668 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, | |
669 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, | |
670 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, | |
671 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0, | |
672 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, | |
673 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, | |
674 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, | |
675 0x00FC, 0x00FD, 0x00FE, 0x00FF | |
676 ]; | |
677 | |
678 //iso8859-1 does not need a conversion table, as its values are all the same as Unicode's | |
679 | |
680 wchar[] encodeTableIso8859_2 = [ | |
681 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, | |
682 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, | |
683 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, | |
684 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021, 0x0022, 0x0023, | |
685 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, | |
686 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, | |
687 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, | |
688 0x003F, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, | |
689 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, | |
690 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, | |
691 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062, | |
692 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, | |
693 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | |
694 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, | |
695 0x007E, 0x007F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, | |
696 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, | |
697 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, | |
698 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0104, | |
699 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, | |
700 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B, 0x00B0, 0x0105, 0x02DB, 0x0142, | |
701 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, | |
702 0x02DD, 0x017E, 0x017C, 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, | |
703 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, | |
704 0x010E, 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, | |
705 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, | |
706 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, | |
707 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, | |
708 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, | |
709 0x00FC, 0x00FD, 0x0163, 0x02D9 | |
710 ]; | |
711 | |
712 wchar[] encodeTableIso8859_3 = [ | |
713 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, | |
714 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, | |
715 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, | |
716 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021, 0x0022, 0x0023, | |
717 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, | |
718 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, | |
719 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, | |
720 0x003F, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, | |
721 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, | |
722 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, | |
723 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062, | |
724 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, | |
725 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | |
726 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, | |
727 0x007E, 0x007F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, | |
728 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, | |
729 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, | |
730 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x0126, | |
731 0x02D8, 0x00A3, 0x00A4, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, | |
732 0x0134, 0x00AD, 0x017B, 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, | |
733 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x017C, | |
734 0x00C0, 0x00C1, 0x00C2, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, | |
735 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D1, 0x00D2, 0x00D3, | |
736 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, | |
737 0x016C, 0x015C, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E4, 0x010B, 0x0109, | |
738 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, | |
739 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, | |
740 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9 | |
741 ]; |