132
|
1 /*******************************************************************************
|
|
2
|
|
3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved
|
|
4
|
|
5 license: BSD style: $(LICENSE)
|
|
6
|
|
7 version: Initial release: Nov 2005
|
|
8
|
|
9 author: Kris
|
|
10
|
|
11 A set of functions for converting between string and integer
|
|
12 values.
|
|
13
|
|
14 Applying the D "import alias" mechanism to this module is highly
|
|
15 recommended, in order to limit namespace pollution:
|
|
16 ---
|
|
17 import Integer = tango.text.convert.Integer;
|
|
18
|
|
19 auto i = Integer.parse ("32767");
|
|
20 ---
|
|
21
|
|
22 *******************************************************************************/
|
|
23
|
|
24 module tango.text.convert.Integer;
|
|
25
|
|
26 private import tango.core.Exception;
|
|
27
|
|
28 /******************************************************************************
|
|
29
|
|
30 Style identifiers
|
|
31
|
|
32 ******************************************************************************/
|
|
33
|
|
34 enum Style
|
|
35 {
|
|
36 Signed = 'd', /// signed decimal
|
|
37 Binary = 'b', /// binary output
|
|
38 Octal = 'o', /// octal output
|
|
39 Hex = 'x', /// lowercase hexadecimal
|
|
40 HexUpper = 'X', /// uppercase hexadecimal
|
|
41 Unsigned = 'u', /// unsigned integer
|
|
42 }
|
|
43
|
|
44 /******************************************************************************
|
|
45
|
|
46 Style flags
|
|
47
|
|
48 ******************************************************************************/
|
|
49
|
|
50 enum Flags
|
|
51 {
|
|
52 None = 0x00, /// no flags
|
|
53 Prefix = 0x01, /// prefix value with type
|
|
54 Zero = 0x02, /// prefix value with zeroes
|
|
55 Plus = 0x04, /// prefix decimal with '+'
|
|
56 Space = 0x08, /// prefix decimal with space
|
|
57 Throw = 0x10, /// throw on output truncation
|
|
58 }
|
|
59
|
|
60 /******************************************************************************
|
|
61
|
|
62 Parse an integer value from the provided 'digits' string.
|
|
63
|
|
64 The string is inspected for a sign and an optional radix
|
|
65 prefix. A radix may be provided as an argument instead,
|
|
66 whereupon it must match the prefix (where present). When
|
|
67 radix is set to zero, conversion will default to decimal.
|
|
68
|
|
69 Throws an exception where the input text is not parsable
|
|
70 in its entirety.
|
|
71
|
|
72 ******************************************************************************/
|
|
73
|
|
74 int toInt(T, U=uint) (T[] digits, U radix=0)
|
|
75 {return toInt!(T)(digits, radix);}
|
|
76
|
|
77 int toInt(T) (T[] digits, uint radix=0)
|
|
78 {
|
|
79 auto x = toLong (digits, radix);
|
|
80 if (x > int.max)
|
|
81 throw new IllegalArgumentException ("Integer.toInt :: integer overflow");
|
|
82 return cast(int) x;
|
|
83 }
|
|
84
|
|
85 /******************************************************************************
|
|
86
|
|
87 Parse an integer value from the provided 'digits' string.
|
|
88
|
|
89 The string is inspected for a sign and an optional radix
|
|
90 prefix. A radix may be provided as an argument instead,
|
|
91 whereupon it must match the prefix (where present). When
|
|
92 radix is set to zero, conversion will default to decimal.
|
|
93
|
|
94 Throws an exception where the input text is not parsable
|
|
95 in its entirety.
|
|
96
|
|
97 ******************************************************************************/
|
|
98
|
|
99 long toLong(T, U=uint) (T[] digits, U radix=0)
|
|
100 {return toLong!(T)(digits, radix);}
|
|
101
|
|
102 long toLong(T) (T[] digits, uint radix=0)
|
|
103 {
|
|
104 uint len;
|
|
105
|
|
106 auto x = parse (digits, radix, &len);
|
|
107 if (len < digits.length)
|
|
108 throw new IllegalArgumentException ("Integer.toLong :: invalid literal");
|
|
109 return x;
|
|
110 }
|
|
111
|
|
112 /******************************************************************************
|
|
113
|
|
114 Template wrapper to make life simpler. Returns a text version
|
|
115 of the provided value.
|
|
116
|
|
117 See format() for details
|
|
118
|
|
119 ******************************************************************************/
|
|
120
|
|
121 char[] toString (long i, Style t=Style.Signed, Flags f=Flags.None)
|
|
122 {
|
|
123 char[66] tmp = void;
|
|
124
|
|
125 return format (tmp, i, t, f).dup;
|
|
126 }
|
|
127
|
|
128 /******************************************************************************
|
|
129
|
|
130 Template wrapper to make life simpler. Returns a text version
|
|
131 of the provided value.
|
|
132
|
|
133 See format() for details
|
|
134
|
|
135 ******************************************************************************/
|
|
136
|
|
137 wchar[] toString16 (long i, Style t=Style.Signed, Flags f=Flags.None)
|
|
138 {
|
|
139 wchar[66] tmp = void;
|
|
140
|
|
141 return format (tmp, i, t, f).dup;
|
|
142 }
|
|
143
|
|
144 /******************************************************************************
|
|
145
|
|
146 Template wrapper to make life simpler. Returns a text version
|
|
147 of the provided value.
|
|
148
|
|
149 See format() for details
|
|
150
|
|
151 ******************************************************************************/
|
|
152
|
|
153 dchar[] toString32 (long i, Style t=Style.Signed, Flags f=Flags.None)
|
|
154 {
|
|
155 dchar[66] tmp = void;
|
|
156
|
|
157 return format (tmp, i, t, f).dup;
|
|
158 }
|
|
159
|
|
160 /*******************************************************************************
|
|
161
|
|
162 Style numeric values into the provided output buffer. The
|
|
163 following types are supported:
|
|
164
|
|
165 Unsigned - unsigned decimal
|
|
166 Signed - signed decimal
|
|
167 Octal - octal
|
|
168 Hex - lowercase hexadecimal
|
|
169 HexUpper - uppercase hexadecimal
|
|
170 Binary - binary
|
|
171
|
|
172 Modifiers supported include:
|
|
173
|
|
174 Prefix - prefix the conversion with a type identifier
|
|
175 Plus - prefix positive decimals with a '+'
|
|
176 Space - prefix positive decimals with one space
|
|
177 Zero - left-pad the number with zeros
|
|
178 Throw - throw an exception when output would be truncated
|
|
179
|
|
180 The provided 'dst' buffer should be sufficiently large
|
|
181 enough to house the output. A 64-element array is often
|
|
182 the maximum required (for a padded binary 64-bit string)
|
|
183
|
|
184 *******************************************************************************/
|
|
185
|
|
186 T[] format(T, U=long) (T[] dst, U i, Style fmt=Style.Signed, Flags flags=Flags.None)
|
|
187 {return format!(T)(dst, i, fmt, flags);}
|
|
188
|
|
189 T[] format(T) (T[] dst, long i, Style fmt=Style.Signed, Flags flags=Flags.None)
|
|
190 {
|
|
191 T[] prefix;
|
|
192 auto len = dst.length;
|
|
193
|
|
194 static T[] error (T[] msg)
|
|
195 {
|
|
196 if (1 & Flags.Throw)
|
|
197 throw new IllegalArgumentException ("Integer.format :: invalid arguments");
|
|
198 return msg;
|
|
199 }
|
|
200
|
|
201 // must have some buffer space to operate within!
|
|
202 if (len)
|
|
203 {
|
|
204 uint radix;
|
|
205 T[] numbers = "0123456789abcdef";
|
|
206
|
|
207 // pre-conversion setup
|
|
208 switch (cast(byte) fmt)
|
|
209 {
|
|
210 case 'd':
|
|
211 case 'D':
|
|
212 if (i < 0)
|
|
213 {
|
|
214 prefix = "-";
|
|
215 i = -i;
|
|
216 }
|
|
217 else
|
|
218 if (flags & Flags.Space)
|
|
219 prefix = " ";
|
|
220 else
|
|
221 if (flags & Flags.Plus)
|
|
222 prefix = "+";
|
|
223 // fall through!
|
|
224 case 'u':
|
|
225 case 'U':
|
|
226 radix = 10;
|
|
227 break;
|
|
228
|
|
229 case 'b':
|
|
230 case 'B':
|
|
231 radix = 2;
|
|
232 if (flags & Flags.Prefix)
|
|
233 prefix = "0b";
|
|
234 break;
|
|
235
|
|
236 case 'o':
|
|
237 case 'O':
|
|
238 radix = 8;
|
|
239 if (flags & Flags.Prefix)
|
|
240 prefix = "0o";
|
|
241 break;
|
|
242
|
|
243 case 'x':
|
|
244 radix = 16;
|
|
245 if (flags & Flags.Prefix)
|
|
246 prefix = "0x";
|
|
247 break;
|
|
248
|
|
249 case 'X':
|
|
250 radix = 16;
|
|
251 numbers = "0123456789ABCDEF";
|
|
252 if (flags & Flags.Prefix)
|
|
253 prefix = "0X";
|
|
254 break;
|
|
255
|
|
256 default:
|
|
257 return error (cast(T[])"{unknown format '"~cast(T)fmt~"'}");
|
|
258 }
|
|
259
|
|
260 // convert number to text
|
|
261 T* p = dst.ptr + len;
|
|
262 if (uint.max >= cast(ulong) i)
|
|
263 {
|
|
264 uint v = cast (uint) i;
|
|
265 do {
|
|
266 *--p = numbers[v % radix];
|
|
267 } while ((v /= radix) && --len);
|
|
268 }
|
|
269 else
|
|
270 {
|
|
271 ulong v = cast (ulong) i;
|
|
272 do {
|
|
273 *--p = numbers[cast(uint) (v % radix)];
|
|
274 } while ((v /= radix) && --len);
|
|
275 }
|
|
276 }
|
|
277
|
|
278 // are we about to overflow?
|
|
279 if (len > prefix.length)
|
|
280 {
|
|
281 len -= prefix.length + 1;
|
|
282
|
|
283 // prefix number with zeros?
|
|
284 if (flags & Flags.Zero)
|
|
285 {
|
|
286 dst [prefix.length .. len + prefix.length] = '0';
|
|
287 len = 0;
|
|
288 }
|
|
289
|
|
290 // write optional prefix string ...
|
|
291 dst [len .. len + prefix.length] = prefix[];
|
|
292 }
|
|
293 else
|
|
294 return error ("{output width too small}");
|
|
295
|
|
296 // return slice of provided output buffer
|
|
297 return dst [len .. $];
|
|
298 }
|
|
299
|
|
300
|
|
301 /******************************************************************************
|
|
302
|
|
303 Parse an integer value from the provided 'digits' string.
|
|
304
|
|
305 The string is inspected for a sign and an optional radix
|
|
306 prefix. A radix may be provided as an argument instead,
|
|
307 whereupon it must match the prefix (where present). When
|
|
308 radix is set to zero, conversion will default to decimal.
|
|
309
|
|
310 A non-null 'ate' will return the number of characters used
|
|
311 to construct the returned value.
|
|
312
|
|
313 ******************************************************************************/
|
|
314
|
|
315 long parse(T, U=uint) (T[] digits, U radix=0, uint* ate=null)
|
|
316 {return parse!(T)(digits, radix, ate);}
|
|
317
|
|
318 long parse(T) (T[] digits, uint radix=0, uint* ate=null)
|
|
319 {
|
|
320 bool sign;
|
|
321
|
|
322 auto eaten = trim (digits, sign, radix);
|
|
323 auto value = convert (digits[eaten..$], radix, ate);
|
|
324
|
|
325 if (ate)
|
|
326 *ate += eaten;
|
|
327
|
|
328 return cast(long) (sign ? -value : value);
|
|
329 }
|
|
330
|
|
331 /******************************************************************************
|
|
332
|
|
333 Convert the provided 'digits' into an integer value,
|
|
334 without checking for a sign or radix. The radix defaults
|
|
335 to decimal (10).
|
|
336
|
|
337 Returns the value and updates 'ate' with the number of
|
|
338 characters consumed.
|
|
339
|
|
340 ******************************************************************************/
|
|
341
|
|
342 ulong convert(T, U=uint) (T[] digits, U radix=10, uint* ate=null)
|
|
343 {return convert!(T)(digits, radix, ate);}
|
|
344
|
|
345 ulong convert(T) (T[] digits, uint radix=10, uint* ate=null)
|
|
346 {
|
|
347 uint eaten;
|
|
348 ulong value;
|
|
349
|
|
350 foreach (c; digits)
|
|
351 {
|
|
352 if (c >= '0' && c <= '9')
|
|
353 {}
|
|
354 else
|
|
355 if (c >= 'a' && c <= 'f')
|
|
356 c -= 39;
|
|
357 else
|
|
358 if (c >= 'A' && c <= 'F')
|
|
359 c -= 7;
|
|
360 else
|
|
361 break;
|
|
362
|
|
363 if ((c -= '0') < radix)
|
|
364 {
|
|
365 value = value * radix + c;
|
|
366 ++eaten;
|
|
367 }
|
|
368 else
|
|
369 break;
|
|
370 }
|
|
371
|
|
372 if (ate)
|
|
373 *ate = eaten;
|
|
374
|
|
375 return value;
|
|
376 }
|
|
377
|
|
378
|
|
379 /******************************************************************************
|
|
380
|
|
381 Strip leading whitespace, extract an optional +/- sign,
|
|
382 and an optional radix prefix. If the radix value matches
|
|
383 an optional prefix, or the radix is zero, the prefix will
|
|
384 be consumed and assigned. Where the radix is non zero and
|
|
385 does not match an explicit prefix, the latter will remain
|
|
386 unconsumed. Otherwise, radix will default to 10.
|
|
387
|
|
388 Returns the number of characters consumed.
|
|
389
|
|
390 ******************************************************************************/
|
|
391
|
|
392 uint trim(T, U=uint) (T[] digits, inout bool sign, inout U radix)
|
|
393 {return trim!(T)(digits, sign, radix);}
|
|
394
|
|
395 uint trim(T) (T[] digits, inout bool sign, inout uint radix)
|
|
396 {
|
|
397 T c;
|
|
398 T* p = digits.ptr;
|
|
399 int len = digits.length;
|
|
400
|
|
401 if (len)
|
|
402 {
|
|
403 // strip off whitespace and sign characters
|
|
404 for (c = *p; len; c = *++p, --len)
|
|
405 if (c is ' ' || c is '\t')
|
|
406 {}
|
|
407 else
|
|
408 if (c is '-')
|
|
409 sign = true;
|
|
410 else
|
|
411 if (c is '+')
|
|
412 sign = false;
|
|
413 else
|
|
414 break;
|
|
415
|
|
416 // strip off a radix specifier also?
|
|
417 auto r = radix;
|
|
418 if (c is '0' && len > 1)
|
|
419 switch (*++p)
|
|
420 {
|
|
421 case 'x':
|
|
422 case 'X':
|
|
423 r = 16, ++p;
|
|
424 break;
|
|
425
|
|
426 case 'b':
|
|
427 case 'B':
|
|
428 r = 2, ++p;
|
|
429 break;
|
|
430
|
|
431 case 'o':
|
|
432 case 'O':
|
|
433 r = 8, ++p;
|
|
434 break;
|
|
435
|
|
436 default:
|
|
437 break;
|
|
438 }
|
|
439
|
|
440 // default the radix to 10
|
|
441 if (r is 0)
|
|
442 radix = 10;
|
|
443 else
|
|
444 // explicit radix must match (optional) prefix
|
|
445 if (radix != r)
|
|
446 if (radix)
|
|
447 --p;
|
|
448 else
|
|
449 radix = r;
|
|
450 }
|
|
451
|
|
452 // return number of characters eaten
|
|
453 return (p - digits.ptr);
|
|
454 }
|
|
455
|
|
456
|
|
457 /******************************************************************************
|
|
458
|
|
459 quick & dirty text-to-unsigned int converter. Use only when you
|
|
460 know what the content is, or use parse() or convert() instead.
|
|
461
|
|
462 Return the parsed uint
|
|
463
|
|
464 ******************************************************************************/
|
|
465
|
|
466 uint atoi(T) (T[] s)
|
|
467 {
|
|
468 uint value;
|
|
469
|
|
470 foreach (c; s)
|
|
471 if (c >= '0' && c <= '9')
|
|
472 value = value * 10 + (c - '0');
|
|
473 else
|
|
474 break;
|
|
475 return value;
|
|
476 }
|
|
477
|
|
478
|
|
479 /******************************************************************************
|
|
480
|
|
481 quick & dirty unsigned to text converter, where the provided output
|
|
482 must be large enough to house the result (10 digits in the largest
|
|
483 case). For mainstream use, consider utilizing format() instead.
|
|
484
|
|
485 Returns a populated slice of the provided output
|
|
486
|
|
487 ******************************************************************************/
|
|
488
|
|
489 T[] itoa(T, U=uint) (T[] output, U value)
|
|
490 {return itoa!(T)(output, value);}
|
|
491
|
|
492 T[] itoa(T) (T[] output, uint value)
|
|
493 {
|
|
494 T* p = output.ptr + output.length;
|
|
495
|
|
496 do {
|
|
497 *--p = value % 10 + '0';
|
|
498 } while (value /= 10);
|
|
499 return output[p-output.ptr .. $];
|
|
500 }
|
|
501
|
|
502
|
|
503 /******************************************************************************
|
|
504
|
|
505 ******************************************************************************/
|
|
506
|
|
507 debug (UnitTest)
|
|
508 {
|
|
509 unittest
|
|
510 {
|
|
511 char[64] tmp;
|
|
512
|
|
513 assert (toInt("1") is 1);
|
|
514 assert (toLong("1") is 1);
|
|
515 assert (toInt("1", 10) is 1);
|
|
516 assert (toLong("1", 10) is 1);
|
|
517
|
|
518 assert (atoi ("12345") is 12345);
|
|
519 assert (itoa (tmp, 12345) == "12345");
|
|
520
|
|
521 assert(parse( "0"w ) == 0 );
|
|
522 assert(parse( "1"w ) == 1 );
|
|
523 assert(parse( "-1"w ) == -1 );
|
|
524 assert(parse( "+1"w ) == 1 );
|
|
525
|
|
526 // numerical limits
|
|
527 assert(parse( "-2147483648" ) == int.min );
|
|
528 assert(parse( "2147483647" ) == int.max );
|
|
529 assert(parse( "4294967295" ) == uint.max );
|
|
530
|
|
531 assert(parse( "-9223372036854775808" ) == long.min );
|
|
532 assert(parse( "9223372036854775807" ) == long.max );
|
|
533 assert(parse( "18446744073709551615" ) == ulong.max );
|
|
534
|
|
535 // hex
|
|
536 assert(parse( "a", 16) == 0x0A );
|
|
537 assert(parse( "b", 16) == 0x0B );
|
|
538 assert(parse( "c", 16) == 0x0C );
|
|
539 assert(parse( "d", 16) == 0x0D );
|
|
540 assert(parse( "e", 16) == 0x0E );
|
|
541 assert(parse( "f", 16) == 0x0F );
|
|
542 assert(parse( "A", 16) == 0x0A );
|
|
543 assert(parse( "B", 16) == 0x0B );
|
|
544 assert(parse( "C", 16) == 0x0C );
|
|
545 assert(parse( "D", 16) == 0x0D );
|
|
546 assert(parse( "E", 16) == 0x0E );
|
|
547 assert(parse( "F", 16) == 0x0F );
|
|
548 assert(parse( "FFFF", 16) == ushort.max );
|
|
549 assert(parse( "ffffFFFF", 16) == uint.max );
|
|
550 assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max );
|
|
551 // oct
|
|
552 assert(parse( "55", 8) == 055 );
|
|
553 assert(parse( "100", 8) == 0100 );
|
|
554 // bin
|
|
555 assert(parse( "10000", 2) == 0x10 );
|
|
556 // trim
|
|
557 assert(parse( " \t20") == 20 );
|
|
558 assert(parse( " \t-20") == -20 );
|
|
559 assert(parse( "- \t 20") == -20 );
|
|
560 // recognise radix prefix
|
|
561 assert(parse( "0xFFFF" ) == ushort.max );
|
|
562 assert(parse( "0XffffFFFF" ) == uint.max );
|
|
563 assert(parse( "0o55") == 055 );
|
|
564 assert(parse( "0O55" ) == 055 );
|
|
565 assert(parse( "0b10000") == 0x10 );
|
|
566 assert(parse( "0B10000") == 0x10 );
|
|
567
|
|
568 // prefix tests
|
|
569 char[] str = "0x";
|
|
570 assert(parse( str[0..1] ) == 0 );
|
|
571 assert(parse("0x10", 10) == 0);
|
|
572 assert(parse("0b10", 10) == 0);
|
|
573 assert(parse("0o10", 10) == 0);
|
|
574 assert(parse("0b10") == 0b10);
|
|
575 assert(parse("0o10") == 010);
|
|
576 assert(parse("0b10", 2) == 0b10);
|
|
577 assert(parse("0o10", 8) == 010);
|
|
578
|
|
579 // format tests
|
|
580 assert (format (tmp, 12345L) == "12345");
|
|
581 assert (format (tmp, 0) == "0");
|
|
582 assert (format (tmp, 0x10101L, Style.Hex) == "10101");
|
|
583 assert (format (tmp, 0xfafaL, Style.Hex) == "fafa");
|
|
584 assert (format (tmp, 0xfafaL, Style.HexUpper, Flags.Prefix) == "0XFAFA");
|
|
585 assert (format (tmp, -1L, Style.HexUpper, Flags.Prefix) == "0XFFFFFFFFFFFFFFFF");
|
|
586 assert (format (tmp, -101L) == "-101");
|
|
587 assert (format (tmp, 101L, Style.Signed, Flags.Plus) == "+101");
|
|
588 assert (format (tmp, 101L, Style.Signed, Flags.Space) == " 101");
|
|
589 assert (format (tmp[0..8], 0x5L, Style.Binary, Flags.Prefix | Flags.Zero) == "0b000101");
|
|
590
|
|
591 assert (format (tmp[0..8], -1, Style.Binary, Flags.Prefix | Flags.Zero) == "{output width too small}");
|
|
592 assert (format (tmp[0..2], 0x3, Style.Binary, Flags.Throw) == "11");
|
|
593 assert (format (tmp[0..4], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b11");
|
|
594 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Prefix | Flags.Zero | Flags.Throw) == "0b011");
|
|
595 assert (format (tmp[0..5], 0x3, Style.Binary, Flags.Zero | Flags.Throw) == "00011");
|
|
596 }
|
|
597 }
|
|
598
|