comparison lphobos/std/dateparse.d @ 473:373489eeaf90

Applied downs' lphobos update
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Mon, 04 Aug 2008 19:28:49 +0200
parents
children 88e23f8c2354
comparison
equal deleted inserted replaced
472:15c804b6ce77 473:373489eeaf90
1
2 /*
3 * Copyright (C) 1999-2004 by Digital Mars, www.digitalmars.com
4 * Written by Walter Bright
5 *
6 * This software is provided 'as-is', without any express or implied
7 * warranty. In no event will the authors be held liable for any damages
8 * arising from the use of this software.
9 *
10 * Permission is granted to anyone to use this software for any purpose,
11 * including commercial applications, and to alter it and redistribute it
12 * freely, subject to the following restrictions:
13 *
14 * o The origin of this software must not be misrepresented; you must not
15 * claim that you wrote the original software. If you use this software
16 * in a product, an acknowledgment in the product documentation would be
17 * appreciated but is not required.
18 * o Altered source versions must be plainly marked as such, and must not
19 * be misrepresented as being the original software.
20 * o This notice may not be removed or altered from any source
21 * distribution.
22 */
23
24
25 module std.dateparse;
26
27 private
28 {
29 import std.string;
30 import std.c.stdlib;
31 import std.date;
32 }
33
34 //debug=dateparse;
35
36 class DateParseError : Error
37 {
38 this(char[] s)
39 {
40 super("Invalid date string: " ~ s);
41 }
42 }
43
44 struct DateParse
45 {
46 void parse(char[] s, out Date date)
47 {
48 *this = DateParse.init;
49
50 //version (Win32)
51 buffer = (cast(char *)alloca(s.length))[0 .. s.length];
52 //else
53 //buffer = new char[s.length];
54
55 debug(dateparse) printf("DateParse.parse('%.*s')\n",
56 cast(int) s.length, s.ptr);
57 if (!parseString(s))
58 {
59 goto Lerror;
60 }
61
62 /+
63 if (year == year.init)
64 year = 0;
65 else
66 +/
67 debug(dateparse)
68 printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
69 year, month, day,
70 hours, minutes, seconds, ms,
71 weekday, tzcorrection);
72 if (
73 year == year.init ||
74 (month < 1 || month > 12) ||
75 (day < 1 || day > 31) ||
76 (hours < 0 || hours > 23) ||
77 (minutes < 0 || minutes > 59) ||
78 (seconds < 0 || seconds > 59) ||
79 (tzcorrection != int.min &&
80 ((tzcorrection < -2300 || tzcorrection > 2300) ||
81 (tzcorrection % 10)))
82 )
83 {
84 Lerror:
85 throw new DateParseError(s);
86 }
87
88 if (ampm)
89 { if (hours > 12)
90 goto Lerror;
91 if (hours < 12)
92 {
93 if (ampm == 2) // if P.M.
94 hours += 12;
95 }
96 else if (ampm == 1) // if 12am
97 {
98 hours = 0; // which is midnight
99 }
100 }
101
102 // if (tzcorrection != tzcorrection.init)
103 // tzcorrection /= 100;
104
105 if (year >= 0 && year <= 99)
106 year += 1900;
107
108 date.year = year;
109 date.month = month;
110 date.day = day;
111 date.hour = hours;
112 date.minute = minutes;
113 date.second = seconds;
114 date.ms = ms;
115 date.weekday = weekday;
116 date.tzcorrection = tzcorrection;
117 }
118
119
120 private:
121 int year = int.min; // our "nan" Date value
122 int month; // 1..12
123 int day; // 1..31
124 int hours; // 0..23
125 int minutes; // 0..59
126 int seconds; // 0..59
127 int ms; // 0..999
128 int weekday; // 1..7
129 int ampm; // 0: not specified
130 // 1: AM
131 // 2: PM
132 int tzcorrection = int.min; // -1200..1200 correction in hours
133
134 char[] s;
135 int si;
136 int number;
137 char[] buffer;
138
139 enum DP : byte
140 {
141 err,
142 weekday,
143 month,
144 number,
145 end,
146 colon,
147 minus,
148 slash,
149 ampm,
150 plus,
151 tz,
152 dst,
153 dsttz,
154 }
155
156 DP nextToken()
157 { int nest;
158 uint c;
159 int bi;
160 DP result = DP.err;
161
162 //printf("DateParse::nextToken()\n");
163 for (;;)
164 {
165 assert(si <= s.length);
166 if (si == s.length)
167 { result = DP.end;
168 goto Lret;
169 }
170 //printf("\ts[%d] = '%c'\n", si, s[si]);
171 switch (s[si])
172 {
173 case ':': result = DP.colon; goto ret_inc;
174 case '+': result = DP.plus; goto ret_inc;
175 case '-': result = DP.minus; goto ret_inc;
176 case '/': result = DP.slash; goto ret_inc;
177 case '.':
178 version(DATE_DOT_DELIM)
179 {
180 result = DP.slash;
181 goto ret_inc;
182 }
183 else
184 {
185 si++;
186 break;
187 }
188
189 ret_inc:
190 si++;
191 goto Lret;
192
193 case ' ':
194 case '\n':
195 case '\r':
196 case '\t':
197 case ',':
198 si++;
199 break;
200
201 case '(': // comment
202 nest = 1;
203 for (;;)
204 {
205 si++;
206 if (si == s.length)
207 goto Lret; // error
208 switch (s[si])
209 {
210 case '(':
211 nest++;
212 break;
213
214 case ')':
215 if (--nest == 0)
216 goto Lendofcomment;
217 break;
218
219 default:
220 break;
221 }
222 }
223 Lendofcomment:
224 si++;
225 break;
226
227 default:
228 number = 0;
229 for (;;)
230 {
231 if (si == s.length)
232 // c cannot be undefined here
233 break;
234 c = s[si];
235 if (!(c >= '0' && c <= '9'))
236 break;
237 result = DP.number;
238 number = number * 10 + (c - '0');
239 si++;
240 }
241 if (result == DP.number)
242 goto Lret;
243
244 bi = 0;
245 bufloop:
246 while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
247 {
248 if (c < 'a') // if upper case
249 c += cast(uint)'a' - cast(uint)'A'; // to lower case
250 buffer[bi] = cast(char)c;
251 bi++;
252 do
253 {
254 si++;
255 if (si == s.length)
256 break bufloop;
257 c = s[si];
258 } while (c == '.'); // ignore embedded '.'s
259 }
260 result = classify(buffer[0 .. bi]);
261 goto Lret;
262 }
263 }
264 Lret:
265 //printf("-DateParse::nextToken()\n");
266 return result;
267 }
268
269 DP classify(char[] buf)
270 {
271 struct DateID
272 {
273 char[] name;
274 DP tok;
275 short value;
276 }
277
278 static DateID dateidtab[] =
279 [
280 { "january", DP.month, 1},
281 { "february", DP.month, 2},
282 { "march", DP.month, 3},
283 { "april", DP.month, 4},
284 { "may", DP.month, 5},
285 { "june", DP.month, 6},
286 { "july", DP.month, 7},
287 { "august", DP.month, 8},
288 { "september", DP.month, 9},
289 { "october", DP.month, 10},
290 { "november", DP.month, 11},
291 { "december", DP.month, 12},
292 { "jan", DP.month, 1},
293 { "feb", DP.month, 2},
294 { "mar", DP.month, 3},
295 { "apr", DP.month, 4},
296 { "jun", DP.month, 6},
297 { "jul", DP.month, 7},
298 { "aug", DP.month, 8},
299 { "sep", DP.month, 9},
300 { "sept", DP.month, 9},
301 { "oct", DP.month, 10},
302 { "nov", DP.month, 11},
303 { "dec", DP.month, 12},
304
305 { "sunday", DP.weekday, 1},
306 { "monday", DP.weekday, 2},
307 { "tuesday", DP.weekday, 3},
308 { "tues", DP.weekday, 3},
309 { "wednesday", DP.weekday, 4},
310 { "wednes", DP.weekday, 4},
311 { "thursday", DP.weekday, 5},
312 { "thur", DP.weekday, 5},
313 { "thurs", DP.weekday, 5},
314 { "friday", DP.weekday, 6},
315 { "saturday", DP.weekday, 7},
316
317 { "sun", DP.weekday, 1},
318 { "mon", DP.weekday, 2},
319 { "tue", DP.weekday, 3},
320 { "wed", DP.weekday, 4},
321 { "thu", DP.weekday, 5},
322 { "fri", DP.weekday, 6},
323 { "sat", DP.weekday, 7},
324
325 { "am", DP.ampm, 1},
326 { "pm", DP.ampm, 2},
327
328 { "gmt", DP.tz, +000},
329 { "ut", DP.tz, +000},
330 { "utc", DP.tz, +000},
331 { "wet", DP.tz, +000},
332 { "z", DP.tz, +000},
333 { "wat", DP.tz, +100},
334 { "a", DP.tz, +100},
335 { "at", DP.tz, +200},
336 { "b", DP.tz, +200},
337 { "c", DP.tz, +300},
338 { "ast", DP.tz, +400},
339 { "d", DP.tz, +400},
340 { "est", DP.tz, +500},
341 { "e", DP.tz, +500},
342 { "cst", DP.tz, +600},
343 { "f", DP.tz, +600},
344 { "mst", DP.tz, +700},
345 { "g", DP.tz, +700},
346 { "pst", DP.tz, +800},
347 { "h", DP.tz, +800},
348 { "yst", DP.tz, +900},
349 { "i", DP.tz, +900},
350 { "ahst", DP.tz, +1000},
351 { "cat", DP.tz, +1000},
352 { "hst", DP.tz, +1000},
353 { "k", DP.tz, +1000},
354 { "nt", DP.tz, +1100},
355 { "l", DP.tz, +1100},
356 { "idlw", DP.tz, +1200},
357 { "m", DP.tz, +1200},
358
359 { "cet", DP.tz, -100},
360 { "fwt", DP.tz, -100},
361 { "met", DP.tz, -100},
362 { "mewt", DP.tz, -100},
363 { "swt", DP.tz, -100},
364 { "n", DP.tz, -100},
365 { "eet", DP.tz, -200},
366 { "o", DP.tz, -200},
367 { "bt", DP.tz, -300},
368 { "p", DP.tz, -300},
369 { "zp4", DP.tz, -400},
370 { "q", DP.tz, -400},
371 { "zp5", DP.tz, -500},
372 { "r", DP.tz, -500},
373 { "zp6", DP.tz, -600},
374 { "s", DP.tz, -600},
375 { "wast", DP.tz, -700},
376 { "t", DP.tz, -700},
377 { "cct", DP.tz, -800},
378 { "u", DP.tz, -800},
379 { "jst", DP.tz, -900},
380 { "v", DP.tz, -900},
381 { "east", DP.tz, -1000},
382 { "gst", DP.tz, -1000},
383 { "w", DP.tz, -1000},
384 { "x", DP.tz, -1100},
385 { "idle", DP.tz, -1200},
386 { "nzst", DP.tz, -1200},
387 { "nzt", DP.tz, -1200},
388 { "y", DP.tz, -1200},
389
390 { "bst", DP.dsttz, 000},
391 { "adt", DP.dsttz, +400},
392 { "edt", DP.dsttz, +500},
393 { "cdt", DP.dsttz, +600},
394 { "mdt", DP.dsttz, +700},
395 { "pdt", DP.dsttz, +800},
396 { "ydt", DP.dsttz, +900},
397 { "hdt", DP.dsttz, +1000},
398 { "mest", DP.dsttz, -100},
399 { "mesz", DP.dsttz, -100},
400 { "sst", DP.dsttz, -100},
401 { "fst", DP.dsttz, -100},
402 { "wadt", DP.dsttz, -700},
403 { "eadt", DP.dsttz, -1000},
404 { "nzdt", DP.dsttz, -1200},
405
406 { "dst", DP.dst, 0},
407 ];
408
409 //message(DTEXT("DateParse::classify('%s')\n"), buf);
410
411 // Do a linear search. Yes, it would be faster with a binary
412 // one.
413 for (uint i = 0; i < dateidtab.length; i++)
414 {
415 if (std.string.cmp(dateidtab[i].name, buf) == 0)
416 {
417 number = dateidtab[i].value;
418 return dateidtab[i].tok;
419 }
420 }
421 return DP.err;
422 }
423
424 int parseString(char[] s)
425 {
426 int n1;
427 int dp;
428 int sisave;
429 int result;
430
431 //message(DTEXT("DateParse::parseString('%ls')\n"), s);
432 this.s = s;
433 si = 0;
434 dp = nextToken();
435 for (;;)
436 {
437 //message(DTEXT("\tdp = %d\n"), dp);
438 switch (dp)
439 {
440 case DP.end:
441 result = 1;
442 Lret:
443 return result;
444
445 case DP.err:
446 case_error:
447 //message(DTEXT("\terror\n"));
448 default:
449 result = 0;
450 goto Lret;
451
452 case DP.minus:
453 break; // ignore spurious '-'
454
455 case DP.weekday:
456 weekday = number;
457 break;
458
459 case DP.month: // month day, [year]
460 month = number;
461 dp = nextToken();
462 if (dp == DP.number)
463 {
464 day = number;
465 sisave = si;
466 dp = nextToken();
467 if (dp == DP.number)
468 {
469 n1 = number;
470 dp = nextToken();
471 if (dp == DP.colon)
472 { // back up, not a year
473 si = sisave;
474 }
475 else
476 { year = n1;
477 continue;
478 }
479 break;
480 }
481 }
482 continue;
483
484 case DP.number:
485 n1 = number;
486 dp = nextToken();
487 switch (dp)
488 {
489 case DP.end:
490 year = n1;
491 break;
492
493 case DP.minus:
494 case DP.slash: // n1/ ? ? ?
495 dp = parseCalendarDate(n1);
496 if (dp == DP.err)
497 goto case_error;
498 break;
499
500 case DP.colon: // hh:mm [:ss] [am | pm]
501 dp = parseTimeOfDay(n1);
502 if (dp == DP.err)
503 goto case_error;
504 break;
505
506 case DP.ampm:
507 hours = n1;
508 minutes = 0;
509 seconds = 0;
510 ampm = number;
511 break;
512
513 case DP.month:
514 day = n1;
515 month = number;
516 dp = nextToken();
517 if (dp == DP.number)
518 { // day month year
519 year = number;
520 dp = nextToken();
521 }
522 break;
523
524 default:
525 year = n1;
526 break;
527 }
528 continue;
529 }
530 dp = nextToken();
531 }
532 assert(0);
533 }
534
535 int parseCalendarDate(int n1)
536 {
537 int n2;
538 int n3;
539 int dp;
540
541 debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1);
542 dp = nextToken();
543 if (dp == DP.month) // day/month
544 {
545 day = n1;
546 month = number;
547 dp = nextToken();
548 if (dp == DP.number)
549 { // day/month year
550 year = number;
551 dp = nextToken();
552 }
553 else if (dp == DP.minus || dp == DP.slash)
554 { // day/month/year
555 dp = nextToken();
556 if (dp != DP.number)
557 goto case_error;
558 year = number;
559 dp = nextToken();
560 }
561 return dp;
562 }
563 if (dp != DP.number)
564 goto case_error;
565 n2 = number;
566 //message(DTEXT("\tn2 = %d\n"), n2);
567 dp = nextToken();
568 if (dp == DP.minus || dp == DP.slash)
569 {
570 dp = nextToken();
571 if (dp != DP.number)
572 goto case_error;
573 n3 = number;
574 //message(DTEXT("\tn3 = %d\n"), n3);
575 dp = nextToken();
576
577 // case1: year/month/day
578 // case2: month/day/year
579 int case1, case2;
580
581 case1 = (n1 > 12 ||
582 (n2 >= 1 && n2 <= 12) &&
583 (n3 >= 1 && n3 <= 31));
584 case2 = ((n1 >= 1 && n1 <= 12) &&
585 (n2 >= 1 && n2 <= 31) ||
586 n3 > 31);
587 if (case1 == case2)
588 goto case_error;
589 if (case1)
590 {
591 year = n1;
592 month = n2;
593 day = n3;
594 }
595 else
596 {
597 month = n1;
598 day = n2;
599 year = n3;
600 }
601 }
602 else
603 { // must be month/day
604 month = n1;
605 day = n2;
606 }
607 return dp;
608
609 case_error:
610 return DP.err;
611 }
612
613 int parseTimeOfDay(int n1)
614 {
615 int dp;
616 int sign;
617
618 // 12am is midnight
619 // 12pm is noon
620
621 //message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1);
622 hours = n1;
623 dp = nextToken();
624 if (dp != DP.number)
625 goto case_error;
626 minutes = number;
627 dp = nextToken();
628 if (dp == DP.colon)
629 {
630 dp = nextToken();
631 if (dp != DP.number)
632 goto case_error;
633 seconds = number;
634 dp = nextToken();
635 }
636 else
637 seconds = 0;
638
639 if (dp == DP.ampm)
640 {
641 ampm = number;
642 dp = nextToken();
643 }
644 else if (dp == DP.plus || dp == DP.minus)
645 {
646 Loffset:
647 sign = (dp == DP.minus) ? -1 : 1;
648 dp = nextToken();
649 if (dp != DP.number)
650 goto case_error;
651 tzcorrection = -sign * number;
652 dp = nextToken();
653 }
654 else if (dp == DP.tz)
655 {
656 tzcorrection = number;
657 dp = nextToken();
658 if (number == 0 && (dp == DP.plus || dp == DP.minus))
659 goto Loffset;
660 if (dp == DP.dst)
661 { tzcorrection += 100;
662 dp = nextToken();
663 }
664 }
665 else if (dp == DP.dsttz)
666 {
667 tzcorrection = number;
668 dp = nextToken();
669 }
670
671 return dp;
672
673 case_error:
674 return DP.err;
675 }
676
677 }
678
679 unittest
680 {
681 DateParse dp;
682 Date d;
683
684 dp.parse("March 10, 1959 12:00 -800", d);
685 assert(d.year == 1959);
686 assert(d.month == 3);
687 assert(d.day == 10);
688 assert(d.hour == 12);
689 assert(d.minute == 0);
690 assert(d.second == 0);
691 assert(d.ms == 0);
692 assert(d.weekday == 0);
693 assert(d.tzcorrection == 800);
694
695 dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d);
696 assert(d.year == 1996);
697 assert(d.month == 4);
698 assert(d.day == 2);
699 assert(d.hour == 2);
700 assert(d.minute == 4);
701 assert(d.second == 57);
702 assert(d.ms == 0);
703 assert(d.weekday == 3);
704 assert(d.tzcorrection == 800);
705
706 dp.parse("March 14, -1980 21:14:50", d);
707 assert(d.year == 1980);
708 assert(d.month == 3);
709 assert(d.day == 14);
710 assert(d.hour == 21);
711 assert(d.minute == 14);
712 assert(d.second == 50);
713 assert(d.ms == 0);
714 assert(d.weekday == 0);
715 assert(d.tzcorrection == int.min);
716
717 dp.parse("Tue Apr 02 02:04:57 1996", d);
718 assert(d.year == 1996);
719 assert(d.month == 4);
720 assert(d.day == 2);
721 assert(d.hour == 2);
722 assert(d.minute == 4);
723 assert(d.second == 57);
724 assert(d.ms == 0);
725 assert(d.weekday == 3);
726 assert(d.tzcorrection == int.min);
727
728 dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d);
729 assert(d.year == 1996);
730 assert(d.month == 4);
731 assert(d.day == 2);
732 assert(d.hour == 2);
733 assert(d.minute == 4);
734 assert(d.second == 57);
735 assert(d.ms == 0);
736 assert(d.weekday == 3);
737 assert(d.tzcorrection == 0);
738
739 dp.parse("December 31, 3000", d);
740 assert(d.year == 3000);
741 assert(d.month == 12);
742 assert(d.day == 31);
743 assert(d.hour == 0);
744 assert(d.minute == 0);
745 assert(d.second == 0);
746 assert(d.ms == 0);
747 assert(d.weekday == 0);
748 assert(d.tzcorrection == int.min);
749
750 dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d);
751 assert(d.year == 1969);
752 assert(d.month == 12);
753 assert(d.day == 31);
754 assert(d.hour == 16);
755 assert(d.minute == 0);
756 assert(d.second == 0);
757 assert(d.ms == 0);
758 assert(d.weekday == 4);
759 assert(d.tzcorrection == 0);
760
761 dp.parse("1/1/1999 12:30 AM", d);
762 assert(d.year == 1999);
763 assert(d.month == 1);
764 assert(d.day == 1);
765 assert(d.hour == 0);
766 assert(d.minute == 30);
767 assert(d.second == 0);
768 assert(d.ms == 0);
769 assert(d.weekday == 0);
770 assert(d.tzcorrection == int.min);
771
772 dp.parse("Tue, 20 May 2003 15:38:58 +0530", d);
773 assert(d.year == 2003);
774 assert(d.month == 5);
775 assert(d.day == 20);
776 assert(d.hour == 15);
777 assert(d.minute == 38);
778 assert(d.second == 58);
779 assert(d.ms == 0);
780 assert(d.weekday == 3);
781 assert(d.tzcorrection == -530);
782
783 debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
784 d.year, d.month, d.day,
785 d.hour, d.minute, d.second, d.ms,
786 d.weekday, d.tzcorrection);
787 }
788