view lphobos/std/dateparse.d @ 883:b52d5de7783f

GC defines and linkage changes.
author Christian Kamm <kamm incasoftware de>
date Thu, 08 Jan 2009 18:20:02 +0100
parents 88e23f8c2354
children
line wrap: on
line source


/*
 *  Copyright (C) 1999-2004 by Digital Mars, www.digitalmars.com
 *  Written by Walter Bright
 *
 *  This software is provided 'as-is', without any express or implied
 *  warranty. In no event will the authors be held liable for any damages
 *  arising from the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  o  The origin of this software must not be misrepresented; you must not
 *     claim that you wrote the original software. If you use this software
 *     in a product, an acknowledgment in the product documentation would be
 *     appreciated but is not required.
 *  o  Altered source versions must be plainly marked as such, and must not
 *     be misrepresented as being the original software.
 *  o  This notice may not be removed or altered from any source
 *     distribution.
 */


module std.dateparse;

private
{
    import std.string;
    import std.c.stdlib;
    import std.date;
}

//debug=dateparse;

class DateParseError : Error
{
    this(char[] s)
    {
	super("Invalid date string: " ~ s);
    }
}

struct DateParse
{
    void parse(char[] s, out Date date)
    {
	*this = DateParse.init;

	//version (Win32)
	    buffer = (cast(char *)/*alloca*/malloc(s.length))[0 .. s.length];
	//else
	    //buffer = new char[s.length];

	debug(dateparse) printf("DateParse.parse('%.*s')\n",
	    cast(int) s.length, s.ptr);
	if (!parseString(s))
	{
	    goto Lerror;
	}

    /+
	if (year == year.init)
	    year = 0;
	else
    +/
	debug(dateparse)
	    printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
		year, month, day,
		hours, minutes, seconds, ms,
		weekday, tzcorrection);
	if (
	    year == year.init ||
	    (month < 1 || month > 12) ||
	    (day < 1 || day > 31) ||
	    (hours < 0 || hours > 23) ||
	    (minutes < 0 || minutes > 59) ||
	    (seconds < 0 || seconds > 59) ||
	    (tzcorrection != int.min &&
	     ((tzcorrection < -2300 || tzcorrection > 2300) ||
	      (tzcorrection % 10)))
	    )
	{
	 Lerror:
	    throw new DateParseError(s);
	}

	if (ampm)
	{   if (hours > 12)
		goto Lerror;
	    if (hours < 12)
	    {
		if (ampm == 2)	// if P.M.
		    hours += 12;
	    }
	    else if (ampm == 1)	// if 12am
	    {
		hours = 0;		// which is midnight
	    }
	}

//	if (tzcorrection != tzcorrection.init)
//	    tzcorrection /= 100;

	if (year >= 0 && year <= 99)
	    year += 1900;

	date.year = year;
	date.month = month;
	date.day = day;
	date.hour = hours;
	date.minute = minutes;
	date.second = seconds;
	date.ms = ms;
	date.weekday = weekday;
	date.tzcorrection = tzcorrection;
    }


private:
    int year = int.min;	// our "nan" Date value
    int month;		// 1..12
    int day;		// 1..31
    int hours;		// 0..23
    int minutes;	// 0..59
    int seconds;	// 0..59
    int ms;		// 0..999
    int weekday;	// 1..7
    int ampm;		// 0: not specified
			// 1: AM
			// 2: PM
    int tzcorrection = int.min;	// -1200..1200 correction in hours

    char[] s;
    int si;
    int number;
    char[] buffer;

    enum DP : byte
    {
	err,
	weekday,
	month,
	number,
	end,
	colon,
	minus,
	slash,
	ampm,
	plus,
	tz,
	dst,
	dsttz,
    }

    DP nextToken()
    {   int nest;
	uint c;
	int bi;
	DP result = DP.err;

	//printf("DateParse::nextToken()\n");
	for (;;)
	{
	    assert(si <= s.length);
	    if (si == s.length)
	    {	result = DP.end;
		goto Lret;
	    }
	    //printf("\ts[%d] = '%c'\n", si, s[si]);
	    switch (s[si])
	    {
		case ':':	result = DP.colon; goto ret_inc;
		case '+':	result = DP.plus;  goto ret_inc;
		case '-':	result = DP.minus; goto ret_inc;
		case '/':	result = DP.slash; goto ret_inc;
		case '.':
		    version(DATE_DOT_DELIM)
		    {
			result = DP.slash;
			goto ret_inc;
		    }
		    else
		    {
			si++;
			break;
		    }

		ret_inc:
		    si++;
		    goto Lret;

		case ' ':
		case '\n':
		case '\r':
		case '\t':
		case ',':
		    si++;
		    break;

		case '(':		// comment
		    nest = 1;
		    for (;;)
		    {
			si++;
			if (si == s.length)
			    goto Lret;		// error
			switch (s[si])
			{
			    case '(':
				nest++;
				break;

			    case ')':
				if (--nest == 0)
				    goto Lendofcomment;
				break;

			    default:
				break;
			}
		    }
		Lendofcomment:
		    si++;
		    break;

		default:
		    number = 0;
		    for (;;)
		    {
			if (si == s.length)
			    // c cannot be undefined here
			    break;
			c = s[si];
			if (!(c >= '0' && c <= '9'))
			    break;
			result = DP.number;
			number = number * 10 + (c - '0');
			si++;
		    }
		    if (result == DP.number)
			goto Lret;

		    bi = 0;
		bufloop:
		    while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
		    {
			if (c < 'a')		// if upper case
			    c += cast(uint)'a' - cast(uint)'A';	// to lower case
			buffer[bi] = cast(char)c;
			bi++;
			do
			{
			    si++;
			    if (si == s.length)
				break bufloop;
			    c = s[si];
			} while (c == '.');	// ignore embedded '.'s
		    }
		    result = classify(buffer[0 .. bi]);
		    goto Lret;
	    }
	}
    Lret:
	//printf("-DateParse::nextToken()\n");
	return result;
    }

    DP classify(char[] buf)
    {
	struct DateID
	{
	    char[] name;
	    DP tok;
	    short value;
	}

	static DateID dateidtab[] =
	[
	    {   "january",	DP.month,	1},
	    {   "february",	DP.month,	2},
	    {   "march",	DP.month,	3},
	    {   "april",	DP.month,	4},
	    {   "may",		DP.month,	5},
	    {   "june",		DP.month,	6},
	    {   "july",		DP.month,	7},
	    {   "august",	DP.month,	8},
	    {   "september",	DP.month,	9},
	    {   "october",	DP.month,	10},
	    {   "november",	DP.month,	11},
	    {   "december",	DP.month,	12},
	    {   "jan",		DP.month,	1},
	    {   "feb",		DP.month,	2},
	    {   "mar",		DP.month,	3},
	    {   "apr",		DP.month,	4},
	    {   "jun",		DP.month,	6},
	    {   "jul",		DP.month,	7},
	    {   "aug",		DP.month,	8},
	    {   "sep",		DP.month,	9},
	    {   "sept",		DP.month,	9},
	    {   "oct",		DP.month,	10},
	    {   "nov",		DP.month,	11},
	    {   "dec",		DP.month,	12},

	    {   "sunday",	DP.weekday,	1},
	    {   "monday",	DP.weekday,	2},
	    {   "tuesday",	DP.weekday,	3},
	    {   "tues",		DP.weekday,	3},
	    {   "wednesday",	DP.weekday,	4},
	    {   "wednes",	DP.weekday,	4},
	    {   "thursday",	DP.weekday,	5},
	    {   "thur",		DP.weekday,	5},
	    {   "thurs",	DP.weekday,	5},
	    {   "friday",	DP.weekday,	6},
	    {   "saturday",	DP.weekday,	7},

	    {   "sun",		DP.weekday,	1},
	    {   "mon",		DP.weekday,	2},
	    {   "tue",		DP.weekday,	3},
	    {   "wed",		DP.weekday,	4},
	    {   "thu",		DP.weekday,	5},
	    {   "fri",		DP.weekday,	6},
	    {   "sat",		DP.weekday,	7},

	    {   "am",		DP.ampm,		1},
	    {   "pm",		DP.ampm,		2},

	    {   "gmt",		DP.tz,		+000},
	    {   "ut",		DP.tz,		+000},
	    {   "utc",		DP.tz,		+000},
	    {   "wet",		DP.tz,		+000},
	    {   "z",		DP.tz,		+000},
	    {   "wat",		DP.tz,		+100},
	    {   "a",		DP.tz,		+100},
	    {   "at",		DP.tz,		+200},
	    {   "b",		DP.tz,		+200},
	    {   "c",		DP.tz,		+300},
	    {   "ast",		DP.tz,		+400},
	    {   "d",		DP.tz,		+400},
	    {   "est",		DP.tz,		+500},
	    {   "e",		DP.tz,		+500},
	    {   "cst",		DP.tz,		+600},
	    {   "f",		DP.tz,		+600},
	    {   "mst",		DP.tz,		+700},
	    {   "g",		DP.tz,		+700},
	    {   "pst",		DP.tz,		+800},
	    {   "h",		DP.tz,		+800},
	    {   "yst",		DP.tz,		+900},
	    {   "i",		DP.tz,		+900},
	    {   "ahst",		DP.tz,		+1000},
	    {   "cat",		DP.tz,		+1000},
	    {   "hst",		DP.tz,		+1000},
	    {   "k",		DP.tz,		+1000},
	    {   "nt",		DP.tz,		+1100},
	    {   "l",		DP.tz,		+1100},
	    {   "idlw",		DP.tz,		+1200},
	    {   "m",		DP.tz,		+1200},

	    {   "cet",		DP.tz,		-100},
	    {   "fwt",		DP.tz,		-100},
	    {   "met",		DP.tz,		-100},
	    {   "mewt",		DP.tz,		-100},
	    {   "swt",		DP.tz,		-100},
	    {   "n",		DP.tz,		-100},
	    {   "eet",		DP.tz,		-200},
	    {   "o",		DP.tz,		-200},
	    {   "bt",		DP.tz,		-300},
	    {   "p",		DP.tz,		-300},
	    {   "zp4",		DP.tz,		-400},
	    {   "q",		DP.tz,		-400},
	    {   "zp5",		DP.tz,		-500},
	    {   "r",		DP.tz,		-500},
	    {   "zp6",		DP.tz,		-600},
	    {   "s",		DP.tz,		-600},
	    {   "wast",		DP.tz,		-700},
	    {   "t",		DP.tz,		-700},
	    {   "cct",		DP.tz,		-800},
	    {   "u",		DP.tz,		-800},
	    {   "jst",		DP.tz,		-900},
	    {   "v",		DP.tz,		-900},
	    {   "east",		DP.tz,		-1000},
	    {   "gst",		DP.tz,		-1000},
	    {   "w",		DP.tz,		-1000},
	    {   "x",		DP.tz,		-1100},
	    {   "idle",		DP.tz,		-1200},
	    {   "nzst",		DP.tz,		-1200},
	    {   "nzt",		DP.tz,		-1200},
	    {   "y",		DP.tz,		-1200},

	    {   "bst",		DP.dsttz,	000},
	    {   "adt",		DP.dsttz,	+400},
	    {   "edt",		DP.dsttz,	+500},
	    {   "cdt",		DP.dsttz,	+600},
	    {   "mdt",		DP.dsttz,	+700},
	    {   "pdt",		DP.dsttz,	+800},
	    {   "ydt",		DP.dsttz,	+900},
	    {   "hdt",		DP.dsttz,	+1000},
	    {   "mest",		DP.dsttz,	-100},
	    {   "mesz",		DP.dsttz,	-100},
	    {   "sst",		DP.dsttz,	-100},
	    {   "fst",		DP.dsttz,	-100},
	    {   "wadt",		DP.dsttz,	-700},
	    {   "eadt",		DP.dsttz,	-1000},
	    {   "nzdt",		DP.dsttz,	-1200},

	    {   "dst",		DP.dst,		0},
	];

	//message(DTEXT("DateParse::classify('%s')\n"), buf);

	// Do a linear search. Yes, it would be faster with a binary
	// one.
	for (uint i = 0; i < dateidtab.length; i++)
	{
	    if (std.string.cmp(dateidtab[i].name, buf) == 0)
	    {
		number = dateidtab[i].value;
		return dateidtab[i].tok;
	    }
	}
	return DP.err;
    }

    int parseString(char[] s)
    {
	int n1;
	int dp;
	int sisave;
	int result;

	//message(DTEXT("DateParse::parseString('%ls')\n"), s);
	this.s = s;
	si = 0;
	dp = nextToken();
	for (;;)
	{
	    //message(DTEXT("\tdp = %d\n"), dp);
	    switch (dp)
	    {
		case DP.end:
		    result = 1;
		Lret:
		    return result;

		case DP.err:
		case_error:
		    //message(DTEXT("\terror\n"));
		default:
		    result = 0;
		    goto Lret;

		case DP.minus:
		    break;			// ignore spurious '-'

		case DP.weekday:
		    weekday = number;
		    break;

		case DP.month:		// month day, [year]
		    month = number;
		    dp = nextToken();
		    if (dp == DP.number)
		    {
			day = number;
			sisave = si;
			dp = nextToken();
			if (dp == DP.number)
			{
			    n1 = number;
			    dp = nextToken();
			    if (dp == DP.colon)
			    {   // back up, not a year
				si = sisave;
			    }
			    else
			    {   year = n1;
				continue;
			    }
			    break;
			}
		    }
		    continue;

		case DP.number:
		    n1 = number;
		    dp = nextToken();
		    switch (dp)
		    {
			case DP.end:
			    year = n1;
			    break;

			case DP.minus:
			case DP.slash:	// n1/ ? ? ?
			    dp = parseCalendarDate(n1);
			    if (dp == DP.err)
				goto case_error;
			    break;

		       case DP.colon:	// hh:mm [:ss] [am | pm]
			    dp = parseTimeOfDay(n1);
			    if (dp == DP.err)
				goto case_error;
			    break;

		       case DP.ampm:
			    hours = n1;
			    minutes = 0;
			    seconds = 0;
			    ampm = number;
			    break;

			case DP.month:
			    day = n1;
			    month = number;
			    dp = nextToken();
			    if (dp == DP.number)
			    {   // day month year
				year = number;
				dp = nextToken();
			    }
			    break;

			default:
			    year = n1;
			    break;
		    }
		    continue;
	    }
	    dp = nextToken();
	}
	assert(0);
    }

    int parseCalendarDate(int n1)
    {
	int n2;
	int n3;
	int dp;

	debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1);
	dp = nextToken();
	if (dp == DP.month)	// day/month
	{
	    day = n1;
	    month = number;
	    dp = nextToken();
	    if (dp == DP.number)
	    {   // day/month year
		year = number;
		dp = nextToken();
	    }
	    else if (dp == DP.minus || dp == DP.slash)
	    {   // day/month/year
		dp = nextToken();
		if (dp != DP.number)
		    goto case_error;
		year = number;
		dp = nextToken();
	    }
	    return dp;
	}
	if (dp != DP.number)
	    goto case_error;
	n2 = number;
	//message(DTEXT("\tn2 = %d\n"), n2);
	dp = nextToken();
	if (dp == DP.minus || dp == DP.slash)
	{
	    dp = nextToken();
	    if (dp != DP.number)
		goto case_error;
	    n3 = number;
	    //message(DTEXT("\tn3 = %d\n"), n3);
	    dp = nextToken();

	    // case1: year/month/day
	    // case2: month/day/year
	    int case1, case2;

	    case1 = (n1 > 12 ||
		     (n2 >= 1 && n2 <= 12) &&
		     (n3 >= 1 && n3 <= 31));
	    case2 = ((n1 >= 1 && n1 <= 12) &&
		     (n2 >= 1 && n2 <= 31) ||
		     n3 > 31);
	    if (case1 == case2)
		goto case_error;
	    if (case1)
	    {
		year = n1;
		month = n2;
		day = n3;
	    }
	    else
	    {
		month = n1;
		day = n2;
		year = n3;
	    }
	}
	else
	{   // must be month/day
	    month = n1;
	    day = n2;
	}
	return dp;

    case_error:
	return DP.err;
    }

    int parseTimeOfDay(int n1)
    {
	int dp;
	int sign;

	// 12am is midnight
	// 12pm is noon

	//message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1);
	hours = n1;
	dp = nextToken();
	if (dp != DP.number)
	    goto case_error;
	minutes = number;
	dp = nextToken();
	if (dp == DP.colon)
	{
	    dp = nextToken();
	    if (dp != DP.number)
		goto case_error;
	    seconds = number;
	    dp = nextToken();
	}
	else
	    seconds = 0;

	if (dp == DP.ampm)
	{
	    ampm = number;
	    dp = nextToken();
	}
	else if (dp == DP.plus || dp == DP.minus)
	{
	Loffset:
	    sign = (dp == DP.minus) ? -1 : 1;
	    dp = nextToken();
	    if (dp != DP.number)
		goto case_error;
	    tzcorrection = -sign * number;
	    dp = nextToken();
	}
	else if (dp == DP.tz)
	{
	    tzcorrection = number;
	    dp = nextToken();
	    if (number == 0 && (dp == DP.plus || dp == DP.minus))
		goto Loffset;
	    if (dp == DP.dst)
	    {   tzcorrection += 100;
		dp = nextToken();
	    }
	}
	else if (dp == DP.dsttz)
	{
	    tzcorrection = number;
	    dp = nextToken();
	}

	return dp;

    case_error:
	return DP.err;
    }

}

unittest
{
    DateParse dp;
    Date d;

    dp.parse("March 10, 1959 12:00 -800", d);
    assert(d.year         == 1959);
    assert(d.month        == 3);
    assert(d.day          == 10);
    assert(d.hour         == 12);
    assert(d.minute       == 0);
    assert(d.second       == 0);
    assert(d.ms           == 0);
    assert(d.weekday      == 0);
    assert(d.tzcorrection == 800);

    dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d);
    assert(d.year         == 1996);
    assert(d.month        == 4);
    assert(d.day          == 2);
    assert(d.hour         == 2);
    assert(d.minute       == 4);
    assert(d.second       == 57);
    assert(d.ms           == 0);
    assert(d.weekday      == 3);
    assert(d.tzcorrection == 800);

    dp.parse("March 14, -1980 21:14:50", d);
    assert(d.year         == 1980);
    assert(d.month        == 3);
    assert(d.day          == 14);
    assert(d.hour         == 21);
    assert(d.minute       == 14);
    assert(d.second       == 50);
    assert(d.ms           == 0);
    assert(d.weekday      == 0);
    assert(d.tzcorrection == int.min);

    dp.parse("Tue Apr 02 02:04:57 1996", d);
    assert(d.year         == 1996);
    assert(d.month        == 4);
    assert(d.day          == 2);
    assert(d.hour         == 2);
    assert(d.minute       == 4);
    assert(d.second       == 57);
    assert(d.ms           == 0);
    assert(d.weekday      == 3);
    assert(d.tzcorrection == int.min);

    dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d);
    assert(d.year         == 1996);
    assert(d.month        == 4);
    assert(d.day          == 2);
    assert(d.hour         == 2);
    assert(d.minute       == 4);
    assert(d.second       == 57);
    assert(d.ms           == 0);
    assert(d.weekday      == 3);
    assert(d.tzcorrection == 0);

    dp.parse("December 31, 3000", d);
    assert(d.year         == 3000);
    assert(d.month        == 12);
    assert(d.day          == 31);
    assert(d.hour         == 0);
    assert(d.minute       == 0);
    assert(d.second       == 0);
    assert(d.ms           == 0);
    assert(d.weekday      == 0);
    assert(d.tzcorrection == int.min);

    dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d);
    assert(d.year         == 1969);
    assert(d.month        == 12);
    assert(d.day          == 31);
    assert(d.hour         == 16);
    assert(d.minute       == 0);
    assert(d.second       == 0);
    assert(d.ms           == 0);
    assert(d.weekday      == 4);
    assert(d.tzcorrection == 0);

    dp.parse("1/1/1999 12:30 AM", d);
    assert(d.year         == 1999);
    assert(d.month        == 1);
    assert(d.day          == 1);
    assert(d.hour         == 0);
    assert(d.minute       == 30);
    assert(d.second       == 0);
    assert(d.ms           == 0);
    assert(d.weekday      == 0);
    assert(d.tzcorrection == int.min);

    dp.parse("Tue, 20 May 2003 15:38:58 +0530", d);
    assert(d.year         == 2003);
    assert(d.month        == 5);
    assert(d.day          == 20);
    assert(d.hour         == 15);
    assert(d.minute       == 38);
    assert(d.second       == 58);
    assert(d.ms           == 0);
    assert(d.weekday      == 3);
    assert(d.tzcorrection == -530);

    debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
	d.year, d.month, d.day,
	d.hour, d.minute, d.second, d.ms,
	d.weekday, d.tzcorrection);
}