Mercurial > projects > ldc
comparison tango/tango/text/convert/Integer.d @ 132:1700239cab2e trunk
[svn r136] MAJOR UNSTABLE UPDATE!!!
Initial commit after moving to Tango instead of Phobos.
Lots of bugfixes...
This build is not suitable for most things.
author | lindquist |
---|---|
date | Fri, 11 Jan 2008 17:57:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:5825d48b27d1 | 132:1700239cab2e |
---|---|
1 /******************************************************************************* | |
2 | |
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved | |
4 | |
5 license: BSD style: $(LICENSE) | |
6 | |
7 version: Initial release: Nov 2005 | |
8 | |
9 author: Kris | |
10 | |
11 A set of functions for converting between string and integer | |
12 values. | |
13 | |
14 Applying the D "import alias" mechanism to this module is highly | |
15 recommended, in order to limit namespace pollution: | |
16 --- | |
17 import Integer = tango.text.convert.Integer; | |
18 | |
19 auto i = Integer.parse ("32767"); | |
20 --- | |
21 | |
22 *******************************************************************************/ | |
23 | |
24 module tango.text.convert.Integer; | |
25 | |
26 private import tango.core.Exception; | |
27 | |
28 /****************************************************************************** | |
29 | |
30 Style identifiers | |
31 | |
32 ******************************************************************************/ | |
33 | |
34 enum Style | |
35 { | |
36 Signed = 'd', /// signed decimal | |
37 Binary = 'b', /// binary output | |
38 Octal = 'o', /// octal output | |
39 Hex = 'x', /// lowercase hexadecimal | |
40 HexUpper = 'X', /// uppercase hexadecimal | |
41 Unsigned = 'u', /// unsigned integer | |
42 } | |
43 | |
44 /****************************************************************************** | |
45 | |
46 Style flags | |
47 | |
48 ******************************************************************************/ | |
49 | |
50 enum Flags | |
51 { | |
52 None = 0x00, /// no flags | |
53 Prefix = 0x01, /// prefix value with type | |
54 Zero = 0x02, /// prefix value with zeroes | |
55 Plus = 0x04, /// prefix decimal with '+' | |
56 Space = 0x08, /// prefix decimal with space | |
57 Throw = 0x10, /// throw on output truncation | |
58 } | |
59 | |
60 /****************************************************************************** | |
61 | |
62 Parse an integer value from the provided 'digits' string. | |
63 | |
64 The string is inspected for a sign and an optional radix | |
65 prefix. A radix may be provided as an argument instead, | |
66 whereupon it must match the prefix (where present). When | |
67 radix is set to zero, conversion will default to decimal. | |
68 | |
69 Throws an exception where the input text is not parsable | |
70 in its entirety. | |
71 | |
72 ******************************************************************************/ | |
73 | |
74 int toInt(T, U=uint) (T[] digits, U radix=0) | |
75 {return toInt!(T)(digits, radix);} | |
76 | |
77 int toInt(T) (T[] digits, uint radix=0) | |
78 { | |
79 auto x = toLong (digits, radix); | |
80 if (x > int.max) | |
81 throw new IllegalArgumentException ("Integer.toInt :: integer overflow"); | |
82 return cast(int) x; | |
83 } | |
84 | |
85 /****************************************************************************** | |
86 | |
87 Parse an integer value from the provided 'digits' string. | |
88 | |
89 The string is inspected for a sign and an optional radix | |
90 prefix. A radix may be provided as an argument instead, | |
91 whereupon it must match the prefix (where present). When | |
92 radix is set to zero, conversion will default to decimal. | |
93 | |
94 Throws an exception where the input text is not parsable | |
95 in its entirety. | |
96 | |
97 ******************************************************************************/ | |
98 | |
99 long toLong(T, U=uint) (T[] digits, U radix=0) | |
100 {return toLong!(T)(digits, radix);} | |
101 | |
102 long toLong(T) (T[] digits, uint radix=0) | |
103 { | |
104 uint len; | |
105 | |
106 auto x = parse (digits, radix, &len); | |
107 if (len < digits.length) | |
108 throw new IllegalArgumentException ("Integer.toLong :: invalid literal"); | |
109 return x; | |
110 } | |
111 | |
112 /****************************************************************************** | |
113 | |
114 Template wrapper to make life simpler. Returns a text version | |
115 of the provided value. | |
116 | |
117 See format() for details | |
118 | |
119 ******************************************************************************/ | |
120 | |
121 char[] toString (long i, Style t=Style.Signed, Flags f=Flags.None) | |
122 { | |
123 char[66] tmp = void; | |
124 | |
125 return format (tmp, i, t, f).dup; | |
126 } | |
127 | |
128 /****************************************************************************** | |
129 | |
130 Template wrapper to make life simpler. Returns a text version | |
131 of the provided value. | |
132 | |
133 See format() for details | |
134 | |
135 ******************************************************************************/ | |
136 | |
137 wchar[] toString16 (long i, Style t=Style.Signed, Flags f=Flags.None) | |
138 { | |
139 wchar[66] tmp = void; | |
140 | |
141 return format (tmp, i, t, f).dup; | |
142 } | |
143 | |
144 /****************************************************************************** | |
145 | |
146 Template wrapper to make life simpler. Returns a text version | |
147 of the provided value. | |
148 | |
149 See format() for details | |
150 | |
151 ******************************************************************************/ | |
152 | |
153 dchar[] toString32 (long i, Style t=Style.Signed, Flags f=Flags.None) | |
154 { | |
155 dchar[66] tmp = void; | |
156 | |
157 return format (tmp, i, t, f).dup; | |
158 } | |
159 | |
160 /******************************************************************************* | |
161 | |
162 Style numeric values into the provided output buffer. The | |
163 following types are supported: | |
164 | |
165 Unsigned - unsigned decimal | |
166 Signed - signed decimal | |
167 Octal - octal | |
168 Hex - lowercase hexadecimal | |
169 HexUpper - uppercase hexadecimal | |
170 Binary - binary | |
171 | |
172 Modifiers supported include: | |
173 | |
174 Prefix - prefix the conversion with a type identifier | |
175 Plus - prefix positive decimals with a '+' | |
176 Space - prefix positive decimals with one space | |
177 Zero - left-pad the number with zeros | |
178 Throw - throw an exception when output would be truncated | |
179 | |
180 The provided 'dst' buffer should be sufficiently large | |
181 enough to house the output. A 64-element array is often | |
182 the maximum required (for a padded binary 64-bit string) | |
183 | |
184 *******************************************************************************/ | |
185 | |
186 T[] format(T, U=long) (T[] dst, U i, Style fmt=Style.Signed, Flags flags=Flags.None) | |
187 {return format!(T)(dst, i, fmt, flags);} | |
188 | |
189 T[] format(T) (T[] dst, long i, Style fmt=Style.Signed, Flags flags=Flags.None) | |
190 { | |
191 T[] prefix; | |
192 auto len = dst.length; | |
193 | |
194 static T[] error (T[] msg) | |
195 { | |
196 if (1 & Flags.Throw) | |
197 throw new IllegalArgumentException ("Integer.format :: invalid arguments"); | |
198 return msg; | |
199 } | |
200 | |
201 // must have some buffer space to operate within! | |
202 if (len) | |
203 { | |
204 uint radix; | |
205 T[] numbers = "0123456789abcdef"; | |
206 | |
207 // pre-conversion setup | |
208 switch (cast(byte) fmt) | |
209 { | |
210 case 'd': | |
211 case 'D': | |
212 if (i < 0) | |
213 { | |
214 prefix = "-"; | |
215 i = -i; | |
216 } | |
217 else | |
218 if (flags & Flags.Space) | |
219 prefix = " "; | |
220 else | |
221 if (flags & Flags.Plus) | |
222 prefix = "+"; | |
223 // fall through! | |
224 case 'u': | |
225 case 'U': | |
226 radix = 10; | |
227 break; | |
228 | |
229 case 'b': | |
230 case 'B': | |
231 radix = 2; | |
232 if (flags & Flags.Prefix) | |
233 prefix = "0b"; | |
234 break; | |
235 | |
236 case 'o': | |
237 case 'O': | |
238 radix = 8; | |
239 if (flags & Flags.Prefix) | |
240 prefix = "0o"; | |
241 break; | |
242 | |
243 case 'x': | |
244 radix = 16; | |
245 if (flags & Flags.Prefix) | |
246 prefix = "0x"; | |
247 break; | |
248 | |
249 case 'X': | |
250 radix = 16; | |
251 numbers = "0123456789ABCDEF"; | |
252 if (flags & Flags.Prefix) | |
253 prefix = "0X"; | |
254 break; | |
255 | |
256 default: | |
257 return error (cast(T[])"{unknown format '"~cast(T)fmt~"'}"); | |
258 } | |
259 | |
260 // convert number to text | |
261 T* p = dst.ptr + len; | |
262 if (uint.max >= cast(ulong) i) | |
263 { | |
264 uint v = cast (uint) i; | |
265 do { | |
266 *--p = numbers[v % radix]; | |
267 } while ((v /= radix) && --len); | |
268 } | |
269 else | |
270 { | |
271 ulong v = cast (ulong) i; | |
272 do { | |
273 *--p = numbers[cast(uint) (v % radix)]; | |
274 } while ((v /= radix) && --len); | |
275 } | |
276 } | |
277 | |
278 // are we about to overflow? | |
279 if (len > prefix.length) | |
280 { | |
281 len -= prefix.length + 1; | |
282 | |
283 // prefix number with zeros? | |
284 if (flags & Flags.Zero) | |
285 { | |
286 dst [prefix.length .. len + prefix.length] = '0'; | |
287 len = 0; | |
288 } | |
289 | |
290 // write optional prefix string ... | |
291 dst [len .. len + prefix.length] = prefix[]; | |
292 } | |
293 else | |
294 return error ("{output width too small}"); | |
295 | |
296 // return slice of provided output buffer | |
297 return dst [len .. $]; | |
298 } | |
299 | |
300 | |
301 /****************************************************************************** | |
302 | |
303 Parse an integer value from the provided 'digits' string. | |
304 | |
305 The string is inspected for a sign and an optional radix | |
306 prefix. A radix may be provided as an argument instead, | |
307 whereupon it must match the prefix (where present). When | |
308 radix is set to zero, conversion will default to decimal. | |
309 | |
310 A non-null 'ate' will return the number of characters used | |
311 to construct the returned value. | |
312 | |
313 ******************************************************************************/ | |
314 | |
315 long parse(T, U=uint) (T[] digits, U radix=0, uint* ate=null) | |
316 {return parse!(T)(digits, radix, ate);} | |
317 | |
318 long parse(T) (T[] digits, uint radix=0, uint* ate=null) | |
319 { | |
320 bool sign; | |
321 | |
322 auto eaten = trim (digits, sign, radix); | |
323 auto value = convert (digits[eaten..$], radix, ate); | |
324 | |
325 if (ate) | |
326 *ate += eaten; | |
327 | |
328 return cast(long) (sign ? -value : value); | |
329 } | |
330 | |
331 /****************************************************************************** | |
332 | |
333 Convert the provided 'digits' into an integer value, | |
334 without checking for a sign or radix. The radix defaults | |
335 to decimal (10). | |
336 | |
337 Returns the value and updates 'ate' with the number of | |
338 characters consumed. | |
339 | |
340 ******************************************************************************/ | |
341 | |
342 ulong convert(T, U=uint) (T[] digits, U radix=10, uint* ate=null) | |
343 {return convert!(T)(digits, radix, ate);} | |
344 | |
345 ulong convert(T) (T[] digits, uint radix=10, uint* ate=null) | |
346 { | |
347 uint eaten; | |
348 ulong value; | |
349 | |
350 foreach (c; digits) | |
351 { | |
352 if (c >= '0' && c <= '9') | |
353 {} | |
354 else | |
355 if (c >= 'a' && c <= 'f') | |
356 c -= 39; | |
357 else | |
358 if (c >= 'A' && c <= 'F') | |
359 c -= 7; | |
360 else | |
361 break; | |
362 | |
363 if ((c -= '0') < radix) | |
364 { | |
365 value = value * radix + c; | |
366 ++eaten; | |
367 } | |
368 else | |
369 break; | |
370 } | |
371 | |
372 if (ate) | |
373 *ate = eaten; | |
374 | |
375 return value; | |
376 } | |
377 | |
378 | |
379 /****************************************************************************** | |
380 | |
381 Strip leading whitespace, extract an optional +/- sign, | |
382 and an optional radix prefix. If the radix value matches | |
383 an optional prefix, or the radix is zero, the prefix will | |
384 be consumed and assigned. Where the radix is non zero and | |
385 does not match an explicit prefix, the latter will remain | |
386 unconsumed. Otherwise, radix will default to 10. | |
387 | |
388 Returns the number of characters consumed. | |
389 | |
390 ******************************************************************************/ | |
391 | |
392 uint trim(T, U=uint) (T[] digits, inout bool sign, inout U radix) | |
393 {return trim!(T)(digits, sign, radix);} | |
394 | |
395 uint trim(T) (T[] digits, inout bool sign, inout uint radix) | |
396 { | |
397 T c; | |
398 T* p = digits.ptr; | |
399 int len = digits.length; | |
400 | |
401 if (len) | |
402 { | |
403 // strip off whitespace and sign characters | |
404 for (c = *p; len; c = *++p, --len) | |
405 if (c is ' ' || c is '\t') | |
406 {} | |
407 else | |
408 if (c is '-') | |
409 sign = true; | |
410 else | |
411 if (c is '+') | |
412 sign = false; | |
413 else | |
414 break; | |
415 | |
416 // strip off a radix specifier also? | |
417 auto r = radix; | |
418 if (c is '0' && len > 1) | |
419 switch (*++p) | |
420 { | |
421 case 'x': | |
422 case 'X': | |
423 r = 16, ++p; | |
424 break; | |
425 | |
426 case 'b': | |
427 case 'B': | |
428 r = 2, ++p; | |
429 break; | |
430 | |
431 case 'o': | |
432 case 'O': | |
433 r = 8, ++p; | |
434 break; | |
435 | |
436 default: | |
437 break; | |
438 } | |
439 | |
440 // default the radix to 10 | |
441 if (r is 0) | |
442 radix = 10; | |
443 else | |
444 // explicit radix must match (optional) prefix | |
445 if (radix != r) | |
446 if (radix) | |
447 --p; | |
448 else | |
449 radix = r; | |
450 } | |
451 | |
452 // return number of characters eaten | |
453 return (p - digits.ptr); | |
454 } | |
455 | |
456 | |
457 /****************************************************************************** | |
458 | |
459 quick & dirty text-to-unsigned int converter. Use only when you | |
460 know what the content is, or use parse() or convert() instead. | |
461 | |
462 Return the parsed uint | |
463 | |
464 ******************************************************************************/ | |
465 | |
466 uint atoi(T) (T[] s) | |
467 { | |
468 uint value; | |
469 | |
470 foreach (c; s) | |
471 if (c >= '0' && c <= '9') | |
472 value = value * 10 + (c - '0'); | |
473 else | |
474 break; | |
475 return value; | |
476 } | |
477 | |
478 | |
479 /****************************************************************************** | |
480 | |
481 quick & dirty unsigned to text converter, where the provided output | |
482 must be large enough to house the result (10 digits in the largest | |
483 case). For mainstream use, consider utilizing format() instead. | |
484 | |
485 Returns a populated slice of the provided output | |
486 | |
487 ******************************************************************************/ | |
488 | |
489 T[] itoa(T, U=uint) (T[] output, U value) | |
490 {return itoa!(T)(output, value);} | |
491 | |
492 T[] itoa(T) (T[] output, uint value) | |
493 { | |
494 T* p = output.ptr + output.length; | |
495 | |
496 do { | |
497 *--p = value % 10 + '0'; | |
498 } while (value /= 10); | |
499 return output[p-output.ptr .. $]; | |
500 } | |
501 | |
502 | |
503 /****************************************************************************** | |
504 | |
505 ******************************************************************************/ | |
506 | |
507 debug (UnitTest) | |
508 { | |
509 unittest | |
510 { | |
511 char[64] tmp; | |
512 | |
513 assert (toInt("1") is 1); | |
514 assert (toLong("1") is 1); | |
515 assert (toInt("1", 10) is 1); | |
516 assert (toLong("1", 10) is 1); | |
517 | |
518 assert (atoi ("12345") is 12345); | |
519 assert (itoa (tmp, 12345) == "12345"); | |
520 | |
521 assert(parse( "0"w ) == 0 ); | |
522 assert(parse( "1"w ) == 1 ); | |
523 assert(parse( "-1"w ) == -1 ); | |
524 assert(parse( "+1"w ) == 1 ); | |
525 | |
526 // numerical limits | |
527 assert(parse( "-2147483648" ) == int.min ); | |
528 assert(parse( "2147483647" ) == int.max ); | |
529 assert(parse( "4294967295" ) == uint.max ); | |
530 | |
531 assert(parse( "-9223372036854775808" ) == long.min ); | |
532 assert(parse( "9223372036854775807" ) == long.max ); | |
533 assert(parse( "18446744073709551615" ) == ulong.max ); | |
534 | |
535 // hex | |
536 assert(parse( "a", 16) == 0x0A ); | |
537 assert(parse( "b", 16) == 0x0B ); | |
538 assert(parse( "c", 16) == 0x0C ); | |
539 assert(parse( "d", 16) == 0x0D ); | |
540 assert(parse( "e", 16) == 0x0E ); | |
541 assert(parse( "f", 16) == 0x0F ); | |
542 assert(parse( "A", 16) == 0x0A ); | |
543 assert(parse( "B", 16) == 0x0B ); | |
544 assert(parse( "C", 16) == 0x0C ); | |
545 assert(parse( "D", 16) == 0x0D ); | |
546 assert(parse( "E", 16) == 0x0E ); | |
547 assert(parse( "F", 16) == 0x0F ); | |
548 assert(parse( "FFFF", 16) == ushort.max ); | |
549 assert(parse( "ffffFFFF", 16) == uint.max ); | |
550 assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max ); | |
551 // oct | |
552 assert(parse( "55", 8) == 055 ); | |
553 assert(parse( "100", 8) == 0100 ); | |
554 // bin | |
555 assert(parse( "10000", 2) == 0x10 ); | |
556 // trim | |
557 assert(parse( " \t20") == 20 ); | |
558 assert(parse( " \t-20") == -20 ); | |
559 assert(parse( "- \t 20") == -20 ); | |
560 // recognise radix prefix | |
561 assert(parse( "0xFFFF" ) == ushort.max ); | |
562 assert(parse( "0XffffFFFF" ) == uint.max ); | |
563 assert(parse( "0o55") == 055 ); | |
564 assert(parse( "0O55" ) == 055 ); | |
565 assert(parse( "0b10000") == 0x10 ); | |
566 assert(parse( "0B10000") == 0x10 ); | |
567 | |
568 // prefix tests | |
569 char[] str = "0x"; | |
570 assert(parse( str[0..1] ) == 0 ); | |
571 assert(parse("0x10", 10) == 0); | |
572 assert(parse("0b10", 10) == 0); | |
573 assert(parse("0o10", 10) == 0); | |
574 assert(parse("0b10") == 0b10); | |
575 assert(parse("0o10") == 010); | |
576 assert(parse("0b10", 2) == 0b10); | |
577 assert(parse("0o10", 8) == 010); | |
578 | |
579 // format tests | |
580 assert (format (tmp, 12345L) == "12345"); | |
581 assert (format (tmp, 0) == "0"); | |
582 assert (format (tmp, 0x10101L, Style.Hex) == "10101"); | |
583 assert (format (tmp, 0xfafaL, Style.Hex) == "fafa"); | |
584 assert (format (tmp, 0xfafaL, Style.HexUpper, Flags.Prefix) == "0XFAFA"); | |
585 assert (format (tmp, -1L, Style.HexUpper, Flags.Prefix) == "0XFFFFFFFFFFFFFFFF"); | |
586 assert (format (tmp, -101L) == "-101"); | |
587 assert (format (tmp, 101L, Style.Signed, Flags.Plus) == "+101"); | |
588 assert (format (tmp, 101L, Style.Signed, Flags.Space) == " 101"); | |
589 assert (format (tmp[0..8], 0x5L, Style.Binary, Flags.Prefix | Flags.Zero) == "0b000101"); | |
590 | |
591 assert (format (tmp[0..8], -1, Style.Binary, Flags.Prefix | Flags.Zero) == "{output width too small}"); | |
592 assert (format (tmp[0..2], 0x3, Style.Binary, Flags.Throw) == "11"); | |
593 assert (format (tmp[0..4], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b11"); | |
594 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b011"); | |
595 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Zero | Flags.Throw) == "00011"); | |
596 } | |
597 } | |
598 |