comparison tango/tango/text/convert/Integer.d @ 132:1700239cab2e trunk

[svn r136] MAJOR UNSTABLE UPDATE!!! Initial commit after moving to Tango instead of Phobos. Lots of bugfixes... This build is not suitable for most things.
author lindquist
date Fri, 11 Jan 2008 17:57:40 +0100
parents
children
comparison
equal deleted inserted replaced
131:5825d48b27d1 132:1700239cab2e
1 /*******************************************************************************
2
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
4
5 license: BSD style: $(LICENSE)
6
7 version: Initial release: Nov 2005
8
9 author: Kris
10
11 A set of functions for converting between string and integer
12 values.
13
14 Applying the D "import alias" mechanism to this module is highly
15 recommended, in order to limit namespace pollution:
16 ---
17 import Integer = tango.text.convert.Integer;
18
19 auto i = Integer.parse ("32767");
20 ---
21
22 *******************************************************************************/
23
24 module tango.text.convert.Integer;
25
26 private import tango.core.Exception;
27
28 /******************************************************************************
29
30 Style identifiers
31
32 ******************************************************************************/
33
34 enum Style
35 {
36 Signed = 'd', /// signed decimal
37 Binary = 'b', /// binary output
38 Octal = 'o', /// octal output
39 Hex = 'x', /// lowercase hexadecimal
40 HexUpper = 'X', /// uppercase hexadecimal
41 Unsigned = 'u', /// unsigned integer
42 }
43
44 /******************************************************************************
45
46 Style flags
47
48 ******************************************************************************/
49
50 enum Flags
51 {
52 None = 0x00, /// no flags
53 Prefix = 0x01, /// prefix value with type
54 Zero = 0x02, /// prefix value with zeroes
55 Plus = 0x04, /// prefix decimal with '+'
56 Space = 0x08, /// prefix decimal with space
57 Throw = 0x10, /// throw on output truncation
58 }
59
60 /******************************************************************************
61
62 Parse an integer value from the provided 'digits' string.
63
64 The string is inspected for a sign and an optional radix
65 prefix. A radix may be provided as an argument instead,
66 whereupon it must match the prefix (where present). When
67 radix is set to zero, conversion will default to decimal.
68
69 Throws an exception where the input text is not parsable
70 in its entirety.
71
72 ******************************************************************************/
73
74 int toInt(T, U=uint) (T[] digits, U radix=0)
75 {return toInt!(T)(digits, radix);}
76
77 int toInt(T) (T[] digits, uint radix=0)
78 {
79 auto x = toLong (digits, radix);
80 if (x > int.max)
81 throw new IllegalArgumentException ("Integer.toInt :: integer overflow");
82 return cast(int) x;
83 }
84
85 /******************************************************************************
86
87 Parse an integer value from the provided 'digits' string.
88
89 The string is inspected for a sign and an optional radix
90 prefix. A radix may be provided as an argument instead,
91 whereupon it must match the prefix (where present). When
92 radix is set to zero, conversion will default to decimal.
93
94 Throws an exception where the input text is not parsable
95 in its entirety.
96
97 ******************************************************************************/
98
99 long toLong(T, U=uint) (T[] digits, U radix=0)
100 {return toLong!(T)(digits, radix);}
101
102 long toLong(T) (T[] digits, uint radix=0)
103 {
104 uint len;
105
106 auto x = parse (digits, radix, &len);
107 if (len < digits.length)
108 throw new IllegalArgumentException ("Integer.toLong :: invalid literal");
109 return x;
110 }
111
112 /******************************************************************************
113
114 Template wrapper to make life simpler. Returns a text version
115 of the provided value.
116
117 See format() for details
118
119 ******************************************************************************/
120
121 char[] toString (long i, Style t=Style.Signed, Flags f=Flags.None)
122 {
123 char[66] tmp = void;
124
125 return format (tmp, i, t, f).dup;
126 }
127
128 /******************************************************************************
129
130 Template wrapper to make life simpler. Returns a text version
131 of the provided value.
132
133 See format() for details
134
135 ******************************************************************************/
136
137 wchar[] toString16 (long i, Style t=Style.Signed, Flags f=Flags.None)
138 {
139 wchar[66] tmp = void;
140
141 return format (tmp, i, t, f).dup;
142 }
143
144 /******************************************************************************
145
146 Template wrapper to make life simpler. Returns a text version
147 of the provided value.
148
149 See format() for details
150
151 ******************************************************************************/
152
153 dchar[] toString32 (long i, Style t=Style.Signed, Flags f=Flags.None)
154 {
155 dchar[66] tmp = void;
156
157 return format (tmp, i, t, f).dup;
158 }
159
160 /*******************************************************************************
161
162 Style numeric values into the provided output buffer. The
163 following types are supported:
164
165 Unsigned - unsigned decimal
166 Signed - signed decimal
167 Octal - octal
168 Hex - lowercase hexadecimal
169 HexUpper - uppercase hexadecimal
170 Binary - binary
171
172 Modifiers supported include:
173
174 Prefix - prefix the conversion with a type identifier
175 Plus - prefix positive decimals with a '+'
176 Space - prefix positive decimals with one space
177 Zero - left-pad the number with zeros
178 Throw - throw an exception when output would be truncated
179
180 The provided 'dst' buffer should be sufficiently large
181 enough to house the output. A 64-element array is often
182 the maximum required (for a padded binary 64-bit string)
183
184 *******************************************************************************/
185
186 T[] format(T, U=long) (T[] dst, U i, Style fmt=Style.Signed, Flags flags=Flags.None)
187 {return format!(T)(dst, i, fmt, flags);}
188
189 T[] format(T) (T[] dst, long i, Style fmt=Style.Signed, Flags flags=Flags.None)
190 {
191 T[] prefix;
192 auto len = dst.length;
193
194 static T[] error (T[] msg)
195 {
196 if (1 & Flags.Throw)
197 throw new IllegalArgumentException ("Integer.format :: invalid arguments");
198 return msg;
199 }
200
201 // must have some buffer space to operate within!
202 if (len)
203 {
204 uint radix;
205 T[] numbers = "0123456789abcdef";
206
207 // pre-conversion setup
208 switch (cast(byte) fmt)
209 {
210 case 'd':
211 case 'D':
212 if (i < 0)
213 {
214 prefix = "-";
215 i = -i;
216 }
217 else
218 if (flags & Flags.Space)
219 prefix = " ";
220 else
221 if (flags & Flags.Plus)
222 prefix = "+";
223 // fall through!
224 case 'u':
225 case 'U':
226 radix = 10;
227 break;
228
229 case 'b':
230 case 'B':
231 radix = 2;
232 if (flags & Flags.Prefix)
233 prefix = "0b";
234 break;
235
236 case 'o':
237 case 'O':
238 radix = 8;
239 if (flags & Flags.Prefix)
240 prefix = "0o";
241 break;
242
243 case 'x':
244 radix = 16;
245 if (flags & Flags.Prefix)
246 prefix = "0x";
247 break;
248
249 case 'X':
250 radix = 16;
251 numbers = "0123456789ABCDEF";
252 if (flags & Flags.Prefix)
253 prefix = "0X";
254 break;
255
256 default:
257 return error (cast(T[])"{unknown format '"~cast(T)fmt~"'}");
258 }
259
260 // convert number to text
261 T* p = dst.ptr + len;
262 if (uint.max >= cast(ulong) i)
263 {
264 uint v = cast (uint) i;
265 do {
266 *--p = numbers[v % radix];
267 } while ((v /= radix) && --len);
268 }
269 else
270 {
271 ulong v = cast (ulong) i;
272 do {
273 *--p = numbers[cast(uint) (v % radix)];
274 } while ((v /= radix) && --len);
275 }
276 }
277
278 // are we about to overflow?
279 if (len > prefix.length)
280 {
281 len -= prefix.length + 1;
282
283 // prefix number with zeros?
284 if (flags & Flags.Zero)
285 {
286 dst [prefix.length .. len + prefix.length] = '0';
287 len = 0;
288 }
289
290 // write optional prefix string ...
291 dst [len .. len + prefix.length] = prefix[];
292 }
293 else
294 return error ("{output width too small}");
295
296 // return slice of provided output buffer
297 return dst [len .. $];
298 }
299
300
301 /******************************************************************************
302
303 Parse an integer value from the provided 'digits' string.
304
305 The string is inspected for a sign and an optional radix
306 prefix. A radix may be provided as an argument instead,
307 whereupon it must match the prefix (where present). When
308 radix is set to zero, conversion will default to decimal.
309
310 A non-null 'ate' will return the number of characters used
311 to construct the returned value.
312
313 ******************************************************************************/
314
315 long parse(T, U=uint) (T[] digits, U radix=0, uint* ate=null)
316 {return parse!(T)(digits, radix, ate);}
317
318 long parse(T) (T[] digits, uint radix=0, uint* ate=null)
319 {
320 bool sign;
321
322 auto eaten = trim (digits, sign, radix);
323 auto value = convert (digits[eaten..$], radix, ate);
324
325 if (ate)
326 *ate += eaten;
327
328 return cast(long) (sign ? -value : value);
329 }
330
331 /******************************************************************************
332
333 Convert the provided 'digits' into an integer value,
334 without checking for a sign or radix. The radix defaults
335 to decimal (10).
336
337 Returns the value and updates 'ate' with the number of
338 characters consumed.
339
340 ******************************************************************************/
341
342 ulong convert(T, U=uint) (T[] digits, U radix=10, uint* ate=null)
343 {return convert!(T)(digits, radix, ate);}
344
345 ulong convert(T) (T[] digits, uint radix=10, uint* ate=null)
346 {
347 uint eaten;
348 ulong value;
349
350 foreach (c; digits)
351 {
352 if (c >= '0' && c <= '9')
353 {}
354 else
355 if (c >= 'a' && c <= 'f')
356 c -= 39;
357 else
358 if (c >= 'A' && c <= 'F')
359 c -= 7;
360 else
361 break;
362
363 if ((c -= '0') < radix)
364 {
365 value = value * radix + c;
366 ++eaten;
367 }
368 else
369 break;
370 }
371
372 if (ate)
373 *ate = eaten;
374
375 return value;
376 }
377
378
379 /******************************************************************************
380
381 Strip leading whitespace, extract an optional +/- sign,
382 and an optional radix prefix. If the radix value matches
383 an optional prefix, or the radix is zero, the prefix will
384 be consumed and assigned. Where the radix is non zero and
385 does not match an explicit prefix, the latter will remain
386 unconsumed. Otherwise, radix will default to 10.
387
388 Returns the number of characters consumed.
389
390 ******************************************************************************/
391
392 uint trim(T, U=uint) (T[] digits, inout bool sign, inout U radix)
393 {return trim!(T)(digits, sign, radix);}
394
395 uint trim(T) (T[] digits, inout bool sign, inout uint radix)
396 {
397 T c;
398 T* p = digits.ptr;
399 int len = digits.length;
400
401 if (len)
402 {
403 // strip off whitespace and sign characters
404 for (c = *p; len; c = *++p, --len)
405 if (c is ' ' || c is '\t')
406 {}
407 else
408 if (c is '-')
409 sign = true;
410 else
411 if (c is '+')
412 sign = false;
413 else
414 break;
415
416 // strip off a radix specifier also?
417 auto r = radix;
418 if (c is '0' && len > 1)
419 switch (*++p)
420 {
421 case 'x':
422 case 'X':
423 r = 16, ++p;
424 break;
425
426 case 'b':
427 case 'B':
428 r = 2, ++p;
429 break;
430
431 case 'o':
432 case 'O':
433 r = 8, ++p;
434 break;
435
436 default:
437 break;
438 }
439
440 // default the radix to 10
441 if (r is 0)
442 radix = 10;
443 else
444 // explicit radix must match (optional) prefix
445 if (radix != r)
446 if (radix)
447 --p;
448 else
449 radix = r;
450 }
451
452 // return number of characters eaten
453 return (p - digits.ptr);
454 }
455
456
457 /******************************************************************************
458
459 quick & dirty text-to-unsigned int converter. Use only when you
460 know what the content is, or use parse() or convert() instead.
461
462 Return the parsed uint
463
464 ******************************************************************************/
465
466 uint atoi(T) (T[] s)
467 {
468 uint value;
469
470 foreach (c; s)
471 if (c >= '0' && c <= '9')
472 value = value * 10 + (c - '0');
473 else
474 break;
475 return value;
476 }
477
478
479 /******************************************************************************
480
481 quick & dirty unsigned to text converter, where the provided output
482 must be large enough to house the result (10 digits in the largest
483 case). For mainstream use, consider utilizing format() instead.
484
485 Returns a populated slice of the provided output
486
487 ******************************************************************************/
488
489 T[] itoa(T, U=uint) (T[] output, U value)
490 {return itoa!(T)(output, value);}
491
492 T[] itoa(T) (T[] output, uint value)
493 {
494 T* p = output.ptr + output.length;
495
496 do {
497 *--p = value % 10 + '0';
498 } while (value /= 10);
499 return output[p-output.ptr .. $];
500 }
501
502
503 /******************************************************************************
504
505 ******************************************************************************/
506
507 debug (UnitTest)
508 {
509 unittest
510 {
511 char[64] tmp;
512
513 assert (toInt("1") is 1);
514 assert (toLong("1") is 1);
515 assert (toInt("1", 10) is 1);
516 assert (toLong("1", 10) is 1);
517
518 assert (atoi ("12345") is 12345);
519 assert (itoa (tmp, 12345) == "12345");
520
521 assert(parse( "0"w ) == 0 );
522 assert(parse( "1"w ) == 1 );
523 assert(parse( "-1"w ) == -1 );
524 assert(parse( "+1"w ) == 1 );
525
526 // numerical limits
527 assert(parse( "-2147483648" ) == int.min );
528 assert(parse( "2147483647" ) == int.max );
529 assert(parse( "4294967295" ) == uint.max );
530
531 assert(parse( "-9223372036854775808" ) == long.min );
532 assert(parse( "9223372036854775807" ) == long.max );
533 assert(parse( "18446744073709551615" ) == ulong.max );
534
535 // hex
536 assert(parse( "a", 16) == 0x0A );
537 assert(parse( "b", 16) == 0x0B );
538 assert(parse( "c", 16) == 0x0C );
539 assert(parse( "d", 16) == 0x0D );
540 assert(parse( "e", 16) == 0x0E );
541 assert(parse( "f", 16) == 0x0F );
542 assert(parse( "A", 16) == 0x0A );
543 assert(parse( "B", 16) == 0x0B );
544 assert(parse( "C", 16) == 0x0C );
545 assert(parse( "D", 16) == 0x0D );
546 assert(parse( "E", 16) == 0x0E );
547 assert(parse( "F", 16) == 0x0F );
548 assert(parse( "FFFF", 16) == ushort.max );
549 assert(parse( "ffffFFFF", 16) == uint.max );
550 assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max );
551 // oct
552 assert(parse( "55", 8) == 055 );
553 assert(parse( "100", 8) == 0100 );
554 // bin
555 assert(parse( "10000", 2) == 0x10 );
556 // trim
557 assert(parse( " \t20") == 20 );
558 assert(parse( " \t-20") == -20 );
559 assert(parse( "- \t 20") == -20 );
560 // recognise radix prefix
561 assert(parse( "0xFFFF" ) == ushort.max );
562 assert(parse( "0XffffFFFF" ) == uint.max );
563 assert(parse( "0o55") == 055 );
564 assert(parse( "0O55" ) == 055 );
565 assert(parse( "0b10000") == 0x10 );
566 assert(parse( "0B10000") == 0x10 );
567
568 // prefix tests
569 char[] str = "0x";
570 assert(parse( str[0..1] ) == 0 );
571 assert(parse("0x10", 10) == 0);
572 assert(parse("0b10", 10) == 0);
573 assert(parse("0o10", 10) == 0);
574 assert(parse("0b10") == 0b10);
575 assert(parse("0o10") == 010);
576 assert(parse("0b10", 2) == 0b10);
577 assert(parse("0o10", 8) == 010);
578
579 // format tests
580 assert (format (tmp, 12345L) == "12345");
581 assert (format (tmp, 0) == "0");
582 assert (format (tmp, 0x10101L, Style.Hex) == "10101");
583 assert (format (tmp, 0xfafaL, Style.Hex) == "fafa");
584 assert (format (tmp, 0xfafaL, Style.HexUpper, Flags.Prefix) == "0XFAFA");
585 assert (format (tmp, -1L, Style.HexUpper, Flags.Prefix) == "0XFFFFFFFFFFFFFFFF");
586 assert (format (tmp, -101L) == "-101");
587 assert (format (tmp, 101L, Style.Signed, Flags.Plus) == "+101");
588 assert (format (tmp, 101L, Style.Signed, Flags.Space) == " 101");
589 assert (format (tmp[0..8], 0x5L, Style.Binary, Flags.Prefix | Flags.Zero) == "0b000101");
590
591 assert (format (tmp[0..8], -1, Style.Binary, Flags.Prefix | Flags.Zero) == "{output width too small}");
592 assert (format (tmp[0..2], 0x3, Style.Binary, Flags.Throw) == "11");
593 assert (format (tmp[0..4], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b11");
594 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b011");
595 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Zero | Flags.Throw) == "00011");
596 }
597 }
598