diff lphobos/std/dateparse.d @ 473:373489eeaf90

Applied downs' lphobos update
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Mon, 04 Aug 2008 19:28:49 +0200
parents
children 88e23f8c2354
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lphobos/std/dateparse.d	Mon Aug 04 19:28:49 2008 +0200
@@ -0,0 +1,788 @@
+
+/*
+ *  Copyright (C) 1999-2004 by Digital Mars, www.digitalmars.com
+ *  Written by Walter Bright
+ *
+ *  This software is provided 'as-is', without any express or implied
+ *  warranty. In no event will the authors be held liable for any damages
+ *  arising from the use of this software.
+ *
+ *  Permission is granted to anyone to use this software for any purpose,
+ *  including commercial applications, and to alter it and redistribute it
+ *  freely, subject to the following restrictions:
+ *
+ *  o  The origin of this software must not be misrepresented; you must not
+ *     claim that you wrote the original software. If you use this software
+ *     in a product, an acknowledgment in the product documentation would be
+ *     appreciated but is not required.
+ *  o  Altered source versions must be plainly marked as such, and must not
+ *     be misrepresented as being the original software.
+ *  o  This notice may not be removed or altered from any source
+ *     distribution.
+ */
+
+
+module std.dateparse;
+
+private
+{
+    import std.string;
+    import std.c.stdlib;
+    import std.date;
+}
+
+//debug=dateparse;
+
+class DateParseError : Error
+{
+    this(char[] s)
+    {
+	super("Invalid date string: " ~ s);
+    }
+}
+
+struct DateParse
+{
+    void parse(char[] s, out Date date)
+    {
+	*this = DateParse.init;
+
+	//version (Win32)
+	    buffer = (cast(char *)alloca(s.length))[0 .. s.length];
+	//else
+	    //buffer = new char[s.length];
+
+	debug(dateparse) printf("DateParse.parse('%.*s')\n",
+	    cast(int) s.length, s.ptr);
+	if (!parseString(s))
+	{
+	    goto Lerror;
+	}
+
+    /+
+	if (year == year.init)
+	    year = 0;
+	else
+    +/
+	debug(dateparse)
+	    printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
+		year, month, day,
+		hours, minutes, seconds, ms,
+		weekday, tzcorrection);
+	if (
+	    year == year.init ||
+	    (month < 1 || month > 12) ||
+	    (day < 1 || day > 31) ||
+	    (hours < 0 || hours > 23) ||
+	    (minutes < 0 || minutes > 59) ||
+	    (seconds < 0 || seconds > 59) ||
+	    (tzcorrection != int.min &&
+	     ((tzcorrection < -2300 || tzcorrection > 2300) ||
+	      (tzcorrection % 10)))
+	    )
+	{
+	 Lerror:
+	    throw new DateParseError(s);
+	}
+
+	if (ampm)
+	{   if (hours > 12)
+		goto Lerror;
+	    if (hours < 12)
+	    {
+		if (ampm == 2)	// if P.M.
+		    hours += 12;
+	    }
+	    else if (ampm == 1)	// if 12am
+	    {
+		hours = 0;		// which is midnight
+	    }
+	}
+
+//	if (tzcorrection != tzcorrection.init)
+//	    tzcorrection /= 100;
+
+	if (year >= 0 && year <= 99)
+	    year += 1900;
+
+	date.year = year;
+	date.month = month;
+	date.day = day;
+	date.hour = hours;
+	date.minute = minutes;
+	date.second = seconds;
+	date.ms = ms;
+	date.weekday = weekday;
+	date.tzcorrection = tzcorrection;
+    }
+
+
+private:
+    int year = int.min;	// our "nan" Date value
+    int month;		// 1..12
+    int day;		// 1..31
+    int hours;		// 0..23
+    int minutes;	// 0..59
+    int seconds;	// 0..59
+    int ms;		// 0..999
+    int weekday;	// 1..7
+    int ampm;		// 0: not specified
+			// 1: AM
+			// 2: PM
+    int tzcorrection = int.min;	// -1200..1200 correction in hours
+
+    char[] s;
+    int si;
+    int number;
+    char[] buffer;
+
+    enum DP : byte
+    {
+	err,
+	weekday,
+	month,
+	number,
+	end,
+	colon,
+	minus,
+	slash,
+	ampm,
+	plus,
+	tz,
+	dst,
+	dsttz,
+    }
+
+    DP nextToken()
+    {   int nest;
+	uint c;
+	int bi;
+	DP result = DP.err;
+
+	//printf("DateParse::nextToken()\n");
+	for (;;)
+	{
+	    assert(si <= s.length);
+	    if (si == s.length)
+	    {	result = DP.end;
+		goto Lret;
+	    }
+	    //printf("\ts[%d] = '%c'\n", si, s[si]);
+	    switch (s[si])
+	    {
+		case ':':	result = DP.colon; goto ret_inc;
+		case '+':	result = DP.plus;  goto ret_inc;
+		case '-':	result = DP.minus; goto ret_inc;
+		case '/':	result = DP.slash; goto ret_inc;
+		case '.':
+		    version(DATE_DOT_DELIM)
+		    {
+			result = DP.slash;
+			goto ret_inc;
+		    }
+		    else
+		    {
+			si++;
+			break;
+		    }
+
+		ret_inc:
+		    si++;
+		    goto Lret;
+
+		case ' ':
+		case '\n':
+		case '\r':
+		case '\t':
+		case ',':
+		    si++;
+		    break;
+
+		case '(':		// comment
+		    nest = 1;
+		    for (;;)
+		    {
+			si++;
+			if (si == s.length)
+			    goto Lret;		// error
+			switch (s[si])
+			{
+			    case '(':
+				nest++;
+				break;
+
+			    case ')':
+				if (--nest == 0)
+				    goto Lendofcomment;
+				break;
+
+			    default:
+				break;
+			}
+		    }
+		Lendofcomment:
+		    si++;
+		    break;
+
+		default:
+		    number = 0;
+		    for (;;)
+		    {
+			if (si == s.length)
+			    // c cannot be undefined here
+			    break;
+			c = s[si];
+			if (!(c >= '0' && c <= '9'))
+			    break;
+			result = DP.number;
+			number = number * 10 + (c - '0');
+			si++;
+		    }
+		    if (result == DP.number)
+			goto Lret;
+
+		    bi = 0;
+		bufloop:
+		    while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
+		    {
+			if (c < 'a')		// if upper case
+			    c += cast(uint)'a' - cast(uint)'A';	// to lower case
+			buffer[bi] = cast(char)c;
+			bi++;
+			do
+			{
+			    si++;
+			    if (si == s.length)
+				break bufloop;
+			    c = s[si];
+			} while (c == '.');	// ignore embedded '.'s
+		    }
+		    result = classify(buffer[0 .. bi]);
+		    goto Lret;
+	    }
+	}
+    Lret:
+	//printf("-DateParse::nextToken()\n");
+	return result;
+    }
+
+    DP classify(char[] buf)
+    {
+	struct DateID
+	{
+	    char[] name;
+	    DP tok;
+	    short value;
+	}
+
+	static DateID dateidtab[] =
+	[
+	    {   "january",	DP.month,	1},
+	    {   "february",	DP.month,	2},
+	    {   "march",	DP.month,	3},
+	    {   "april",	DP.month,	4},
+	    {   "may",		DP.month,	5},
+	    {   "june",		DP.month,	6},
+	    {   "july",		DP.month,	7},
+	    {   "august",	DP.month,	8},
+	    {   "september",	DP.month,	9},
+	    {   "october",	DP.month,	10},
+	    {   "november",	DP.month,	11},
+	    {   "december",	DP.month,	12},
+	    {   "jan",		DP.month,	1},
+	    {   "feb",		DP.month,	2},
+	    {   "mar",		DP.month,	3},
+	    {   "apr",		DP.month,	4},
+	    {   "jun",		DP.month,	6},
+	    {   "jul",		DP.month,	7},
+	    {   "aug",		DP.month,	8},
+	    {   "sep",		DP.month,	9},
+	    {   "sept",		DP.month,	9},
+	    {   "oct",		DP.month,	10},
+	    {   "nov",		DP.month,	11},
+	    {   "dec",		DP.month,	12},
+
+	    {   "sunday",	DP.weekday,	1},
+	    {   "monday",	DP.weekday,	2},
+	    {   "tuesday",	DP.weekday,	3},
+	    {   "tues",		DP.weekday,	3},
+	    {   "wednesday",	DP.weekday,	4},
+	    {   "wednes",	DP.weekday,	4},
+	    {   "thursday",	DP.weekday,	5},
+	    {   "thur",		DP.weekday,	5},
+	    {   "thurs",	DP.weekday,	5},
+	    {   "friday",	DP.weekday,	6},
+	    {   "saturday",	DP.weekday,	7},
+
+	    {   "sun",		DP.weekday,	1},
+	    {   "mon",		DP.weekday,	2},
+	    {   "tue",		DP.weekday,	3},
+	    {   "wed",		DP.weekday,	4},
+	    {   "thu",		DP.weekday,	5},
+	    {   "fri",		DP.weekday,	6},
+	    {   "sat",		DP.weekday,	7},
+
+	    {   "am",		DP.ampm,		1},
+	    {   "pm",		DP.ampm,		2},
+
+	    {   "gmt",		DP.tz,		+000},
+	    {   "ut",		DP.tz,		+000},
+	    {   "utc",		DP.tz,		+000},
+	    {   "wet",		DP.tz,		+000},
+	    {   "z",		DP.tz,		+000},
+	    {   "wat",		DP.tz,		+100},
+	    {   "a",		DP.tz,		+100},
+	    {   "at",		DP.tz,		+200},
+	    {   "b",		DP.tz,		+200},
+	    {   "c",		DP.tz,		+300},
+	    {   "ast",		DP.tz,		+400},
+	    {   "d",		DP.tz,		+400},
+	    {   "est",		DP.tz,		+500},
+	    {   "e",		DP.tz,		+500},
+	    {   "cst",		DP.tz,		+600},
+	    {   "f",		DP.tz,		+600},
+	    {   "mst",		DP.tz,		+700},
+	    {   "g",		DP.tz,		+700},
+	    {   "pst",		DP.tz,		+800},
+	    {   "h",		DP.tz,		+800},
+	    {   "yst",		DP.tz,		+900},
+	    {   "i",		DP.tz,		+900},
+	    {   "ahst",		DP.tz,		+1000},
+	    {   "cat",		DP.tz,		+1000},
+	    {   "hst",		DP.tz,		+1000},
+	    {   "k",		DP.tz,		+1000},
+	    {   "nt",		DP.tz,		+1100},
+	    {   "l",		DP.tz,		+1100},
+	    {   "idlw",		DP.tz,		+1200},
+	    {   "m",		DP.tz,		+1200},
+
+	    {   "cet",		DP.tz,		-100},
+	    {   "fwt",		DP.tz,		-100},
+	    {   "met",		DP.tz,		-100},
+	    {   "mewt",		DP.tz,		-100},
+	    {   "swt",		DP.tz,		-100},
+	    {   "n",		DP.tz,		-100},
+	    {   "eet",		DP.tz,		-200},
+	    {   "o",		DP.tz,		-200},
+	    {   "bt",		DP.tz,		-300},
+	    {   "p",		DP.tz,		-300},
+	    {   "zp4",		DP.tz,		-400},
+	    {   "q",		DP.tz,		-400},
+	    {   "zp5",		DP.tz,		-500},
+	    {   "r",		DP.tz,		-500},
+	    {   "zp6",		DP.tz,		-600},
+	    {   "s",		DP.tz,		-600},
+	    {   "wast",		DP.tz,		-700},
+	    {   "t",		DP.tz,		-700},
+	    {   "cct",		DP.tz,		-800},
+	    {   "u",		DP.tz,		-800},
+	    {   "jst",		DP.tz,		-900},
+	    {   "v",		DP.tz,		-900},
+	    {   "east",		DP.tz,		-1000},
+	    {   "gst",		DP.tz,		-1000},
+	    {   "w",		DP.tz,		-1000},
+	    {   "x",		DP.tz,		-1100},
+	    {   "idle",		DP.tz,		-1200},
+	    {   "nzst",		DP.tz,		-1200},
+	    {   "nzt",		DP.tz,		-1200},
+	    {   "y",		DP.tz,		-1200},
+
+	    {   "bst",		DP.dsttz,	000},
+	    {   "adt",		DP.dsttz,	+400},
+	    {   "edt",		DP.dsttz,	+500},
+	    {   "cdt",		DP.dsttz,	+600},
+	    {   "mdt",		DP.dsttz,	+700},
+	    {   "pdt",		DP.dsttz,	+800},
+	    {   "ydt",		DP.dsttz,	+900},
+	    {   "hdt",		DP.dsttz,	+1000},
+	    {   "mest",		DP.dsttz,	-100},
+	    {   "mesz",		DP.dsttz,	-100},
+	    {   "sst",		DP.dsttz,	-100},
+	    {   "fst",		DP.dsttz,	-100},
+	    {   "wadt",		DP.dsttz,	-700},
+	    {   "eadt",		DP.dsttz,	-1000},
+	    {   "nzdt",		DP.dsttz,	-1200},
+
+	    {   "dst",		DP.dst,		0},
+	];
+
+	//message(DTEXT("DateParse::classify('%s')\n"), buf);
+
+	// Do a linear search. Yes, it would be faster with a binary
+	// one.
+	for (uint i = 0; i < dateidtab.length; i++)
+	{
+	    if (std.string.cmp(dateidtab[i].name, buf) == 0)
+	    {
+		number = dateidtab[i].value;
+		return dateidtab[i].tok;
+	    }
+	}
+	return DP.err;
+    }
+
+    int parseString(char[] s)
+    {
+	int n1;
+	int dp;
+	int sisave;
+	int result;
+
+	//message(DTEXT("DateParse::parseString('%ls')\n"), s);
+	this.s = s;
+	si = 0;
+	dp = nextToken();
+	for (;;)
+	{
+	    //message(DTEXT("\tdp = %d\n"), dp);
+	    switch (dp)
+	    {
+		case DP.end:
+		    result = 1;
+		Lret:
+		    return result;
+
+		case DP.err:
+		case_error:
+		    //message(DTEXT("\terror\n"));
+		default:
+		    result = 0;
+		    goto Lret;
+
+		case DP.minus:
+		    break;			// ignore spurious '-'
+
+		case DP.weekday:
+		    weekday = number;
+		    break;
+
+		case DP.month:		// month day, [year]
+		    month = number;
+		    dp = nextToken();
+		    if (dp == DP.number)
+		    {
+			day = number;
+			sisave = si;
+			dp = nextToken();
+			if (dp == DP.number)
+			{
+			    n1 = number;
+			    dp = nextToken();
+			    if (dp == DP.colon)
+			    {   // back up, not a year
+				si = sisave;
+			    }
+			    else
+			    {   year = n1;
+				continue;
+			    }
+			    break;
+			}
+		    }
+		    continue;
+
+		case DP.number:
+		    n1 = number;
+		    dp = nextToken();
+		    switch (dp)
+		    {
+			case DP.end:
+			    year = n1;
+			    break;
+
+			case DP.minus:
+			case DP.slash:	// n1/ ? ? ?
+			    dp = parseCalendarDate(n1);
+			    if (dp == DP.err)
+				goto case_error;
+			    break;
+
+		       case DP.colon:	// hh:mm [:ss] [am | pm]
+			    dp = parseTimeOfDay(n1);
+			    if (dp == DP.err)
+				goto case_error;
+			    break;
+
+		       case DP.ampm:
+			    hours = n1;
+			    minutes = 0;
+			    seconds = 0;
+			    ampm = number;
+			    break;
+
+			case DP.month:
+			    day = n1;
+			    month = number;
+			    dp = nextToken();
+			    if (dp == DP.number)
+			    {   // day month year
+				year = number;
+				dp = nextToken();
+			    }
+			    break;
+
+			default:
+			    year = n1;
+			    break;
+		    }
+		    continue;
+	    }
+	    dp = nextToken();
+	}
+	assert(0);
+    }
+
+    int parseCalendarDate(int n1)
+    {
+	int n2;
+	int n3;
+	int dp;
+
+	debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1);
+	dp = nextToken();
+	if (dp == DP.month)	// day/month
+	{
+	    day = n1;
+	    month = number;
+	    dp = nextToken();
+	    if (dp == DP.number)
+	    {   // day/month year
+		year = number;
+		dp = nextToken();
+	    }
+	    else if (dp == DP.minus || dp == DP.slash)
+	    {   // day/month/year
+		dp = nextToken();
+		if (dp != DP.number)
+		    goto case_error;
+		year = number;
+		dp = nextToken();
+	    }
+	    return dp;
+	}
+	if (dp != DP.number)
+	    goto case_error;
+	n2 = number;
+	//message(DTEXT("\tn2 = %d\n"), n2);
+	dp = nextToken();
+	if (dp == DP.minus || dp == DP.slash)
+	{
+	    dp = nextToken();
+	    if (dp != DP.number)
+		goto case_error;
+	    n3 = number;
+	    //message(DTEXT("\tn3 = %d\n"), n3);
+	    dp = nextToken();
+
+	    // case1: year/month/day
+	    // case2: month/day/year
+	    int case1, case2;
+
+	    case1 = (n1 > 12 ||
+		     (n2 >= 1 && n2 <= 12) &&
+		     (n3 >= 1 && n3 <= 31));
+	    case2 = ((n1 >= 1 && n1 <= 12) &&
+		     (n2 >= 1 && n2 <= 31) ||
+		     n3 > 31);
+	    if (case1 == case2)
+		goto case_error;
+	    if (case1)
+	    {
+		year = n1;
+		month = n2;
+		day = n3;
+	    }
+	    else
+	    {
+		month = n1;
+		day = n2;
+		year = n3;
+	    }
+	}
+	else
+	{   // must be month/day
+	    month = n1;
+	    day = n2;
+	}
+	return dp;
+
+    case_error:
+	return DP.err;
+    }
+
+    int parseTimeOfDay(int n1)
+    {
+	int dp;
+	int sign;
+
+	// 12am is midnight
+	// 12pm is noon
+
+	//message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1);
+	hours = n1;
+	dp = nextToken();
+	if (dp != DP.number)
+	    goto case_error;
+	minutes = number;
+	dp = nextToken();
+	if (dp == DP.colon)
+	{
+	    dp = nextToken();
+	    if (dp != DP.number)
+		goto case_error;
+	    seconds = number;
+	    dp = nextToken();
+	}
+	else
+	    seconds = 0;
+
+	if (dp == DP.ampm)
+	{
+	    ampm = number;
+	    dp = nextToken();
+	}
+	else if (dp == DP.plus || dp == DP.minus)
+	{
+	Loffset:
+	    sign = (dp == DP.minus) ? -1 : 1;
+	    dp = nextToken();
+	    if (dp != DP.number)
+		goto case_error;
+	    tzcorrection = -sign * number;
+	    dp = nextToken();
+	}
+	else if (dp == DP.tz)
+	{
+	    tzcorrection = number;
+	    dp = nextToken();
+	    if (number == 0 && (dp == DP.plus || dp == DP.minus))
+		goto Loffset;
+	    if (dp == DP.dst)
+	    {   tzcorrection += 100;
+		dp = nextToken();
+	    }
+	}
+	else if (dp == DP.dsttz)
+	{
+	    tzcorrection = number;
+	    dp = nextToken();
+	}
+
+	return dp;
+
+    case_error:
+	return DP.err;
+    }
+
+}
+
+unittest
+{
+    DateParse dp;
+    Date d;
+
+    dp.parse("March 10, 1959 12:00 -800", d);
+    assert(d.year         == 1959);
+    assert(d.month        == 3);
+    assert(d.day          == 10);
+    assert(d.hour         == 12);
+    assert(d.minute       == 0);
+    assert(d.second       == 0);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 0);
+    assert(d.tzcorrection == 800);
+
+    dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d);
+    assert(d.year         == 1996);
+    assert(d.month        == 4);
+    assert(d.day          == 2);
+    assert(d.hour         == 2);
+    assert(d.minute       == 4);
+    assert(d.second       == 57);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 3);
+    assert(d.tzcorrection == 800);
+
+    dp.parse("March 14, -1980 21:14:50", d);
+    assert(d.year         == 1980);
+    assert(d.month        == 3);
+    assert(d.day          == 14);
+    assert(d.hour         == 21);
+    assert(d.minute       == 14);
+    assert(d.second       == 50);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 0);
+    assert(d.tzcorrection == int.min);
+
+    dp.parse("Tue Apr 02 02:04:57 1996", d);
+    assert(d.year         == 1996);
+    assert(d.month        == 4);
+    assert(d.day          == 2);
+    assert(d.hour         == 2);
+    assert(d.minute       == 4);
+    assert(d.second       == 57);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 3);
+    assert(d.tzcorrection == int.min);
+
+    dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d);
+    assert(d.year         == 1996);
+    assert(d.month        == 4);
+    assert(d.day          == 2);
+    assert(d.hour         == 2);
+    assert(d.minute       == 4);
+    assert(d.second       == 57);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 3);
+    assert(d.tzcorrection == 0);
+
+    dp.parse("December 31, 3000", d);
+    assert(d.year         == 3000);
+    assert(d.month        == 12);
+    assert(d.day          == 31);
+    assert(d.hour         == 0);
+    assert(d.minute       == 0);
+    assert(d.second       == 0);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 0);
+    assert(d.tzcorrection == int.min);
+
+    dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d);
+    assert(d.year         == 1969);
+    assert(d.month        == 12);
+    assert(d.day          == 31);
+    assert(d.hour         == 16);
+    assert(d.minute       == 0);
+    assert(d.second       == 0);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 4);
+    assert(d.tzcorrection == 0);
+
+    dp.parse("1/1/1999 12:30 AM", d);
+    assert(d.year         == 1999);
+    assert(d.month        == 1);
+    assert(d.day          == 1);
+    assert(d.hour         == 0);
+    assert(d.minute       == 30);
+    assert(d.second       == 0);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 0);
+    assert(d.tzcorrection == int.min);
+
+    dp.parse("Tue, 20 May 2003 15:38:58 +0530", d);
+    assert(d.year         == 2003);
+    assert(d.month        == 5);
+    assert(d.day          == 20);
+    assert(d.hour         == 15);
+    assert(d.minute       == 38);
+    assert(d.second       == 58);
+    assert(d.ms           == 0);
+    assert(d.weekday      == 3);
+    assert(d.tzcorrection == -530);
+
+    debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
+	d.year, d.month, d.day,
+	d.hour, d.minute, d.second, d.ms,
+	d.weekday, d.tzcorrection);
+}
+