view lphobos/std/regexp.d @ 650:aa6a0b7968f7

Added test case for bug #100 Removed dubious check for not emitting static private global in other modules without access. This should be handled properly somewhere else, it's causing unresolved global errors for stuff that should work (in MiniD)
author Tomas Lindquist Olsen <tomas.l.olsen@gmail.com>
date Sun, 05 Oct 2008 17:28:15 +0200
parents 373489eeaf90
children 88e23f8c2354
line wrap: on
line source


// Regular Expressions

/*
 *  Copyright (C) 2000-2005 by Digital Mars, www.digitalmars.com
 *  Written by Walter Bright
 *
 *  This software is provided 'as-is', without any express or implied
 *  warranty. In no event will the authors be held liable for any damages
 *  arising from the use of this software.
 *
 *  Permission is granted to anyone to use this software for any purpose,
 *  including commercial applications, and to alter it and redistribute it
 *  freely, subject to the following restrictions:
 *
 *  o  The origin of this software must not be misrepresented; you must not
 *     claim that you wrote the original software. If you use this software
 *     in a product, an acknowledgment in the product documentation would be
 *     appreciated but is not required.
 *  o  Altered source versions must be plainly marked as such, and must not
 *     be misrepresented as being the original software.
 *  o  This notice may not be removed or altered from any source
 *     distribution.
 */

/* NOTE: This file has been patched from the original DMD distribution to
   work with the GDC compiler.

   Modified by David Friedman, September 2004
*/

/**********************************************
 * $(LINK2 http://www.digitalmars.com/ctg/regular.html, Regular expressions)
 * are a powerful method of string pattern matching.
 * The regular expression
 * language used is the same as that commonly used, however, some of the very
 * advanced forms may behave slightly differently.
 *
 * std.regexp is designed to work only with valid UTF strings as input.
 * To validate untrusted input, use std.utf.validate().
 *
 * In the following guide, $(I pattern)[] refers to a
 * $(LINK2 http://www.digitalmars.com/ctg/regular.html, regular expression).
 * The $(I attributes)[] refers to
	a string controlling the interpretation
	of the regular expression.
	It consists of a sequence of one or more
	of the following characters:

	<table border=1 cellspacing=0 cellpadding=5>
	<caption>Attribute Characters</caption>
	$(TR $(TH Attribute) $(TH Action))
	<tr>
	$(TD $(B g))
	$(TD global; repeat over the whole input string)
	</tr>
	<tr>
	$(TD $(B i))
	$(TD case insensitive)
	</tr>
	<tr>
	$(TD $(B m))
	$(TD treat as multiple lines separated by newlines)
	</tr>
	</table>
 *
 * The $(I format)[] string has the formatting characters:
 *
 *	<table border=1 cellspacing=0 cellpadding=5>
	<caption>Formatting Characters</caption>
	$(TR $(TH Format) $(TH Replaced With))
	$(TR
	$(TD $(B $$))	$(TD $)
	)
	$(TR
	$(TD $(B $&))	$(TD The matched substring.)
	)
	$(TR
	$(TD $(B $`))	$(TD The portion of string that precedes the matched substring.)
	)
	$(TR
	$(TD $(B $'))	$(TD The portion of string that follows the matched substring.)
	)
	$(TR
	$(TD $(B $(DOLLAR))$(I n)) $(TD The $(I n)th capture, where $(I n)
			is a single digit 1-9
			and $$(I n) is not followed by a decimal digit.)
	)
	$(TR
	$(TD $(B $(DOLLAR))$(I nn)) $(TD The $(I nn)th capture, where $(I nn)
			is a two-digit decimal
			number 01-99.
			If $(I nn)th capture is undefined or more than the number
			of parenthesized subexpressions, use the empty
			string instead.)
	)
	</table>

 *	Any other $ are left as is.
 *
 * References:
 *	$(LINK2 http://en.wikipedia.org/wiki/Regular_expressions, Wikipedia)
 * Macros:
 *	WIKI = StdRegexp
 *	DOLLAR = $
 */

/*
	Escape sequences:

	\nnn starts out a 1, 2 or 3 digit octal sequence,
	where n is an octal digit. If nnn is larger than
	0377, then the 3rd digit is not part of the sequence
	and is not consumed.
	For maximal portability, use exactly 3 digits.

	\xXX starts out a 1 or 2 digit hex sequence. X
	is a hex character. If the first character after the \x
	is not a hex character, the value of the sequence is 'x'
	and the XX are not consumed.
	For maximal portability, use exactly 2 digits.

	\uUUUU is a unicode sequence. There are exactly
	4 hex characters after the \u, if any are not, then
	the value of the sequence is 'u', and the UUUU are not
	consumed.

	Character classes:

	[a-b], where a is greater than b, will produce
	an error.

	References:

	http://www.unicode.org/unicode/reports/tr18/
 */

module std.regexp;

//debug = regexp;		// uncomment to turn on debugging printf's

private
{
    import std.c.stdio;
    import std.c.stdlib;
    import std.c.string;
    import std.stdio;
    import std.string;
    import std.ctype;
    import std.outbuffer;
    import std.bitarray;
    import std.utf;
    import std.intrinsic;
}

/** Regular expression to extract an _email address */
const char[] email =
    r"[a-zA-Z]([.]?([[a-zA-Z0-9_]-]+)*)?@([[a-zA-Z0-9_]\-_]+\.)+[a-zA-Z]{2,6}";

/** Regular expression to extract a _url */
const char[] url = r"(([h|H][t|T]|[f|F])[t|T][p|P]([s|S]?)\:\/\/|~/|/)?([\w]+:\w+@)?(([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?)?((/?\w+/)+|/?)(\w+\.[\w]{3,4})?([,]\w+)*((\?\w+=\w+)?(&\w+=\w+)*([,]\w*)*)?";

/************************************
 * One of these gets thrown on compilation errors
 */

class RegExpException : Exception
{
    this(char[] msg)
    {
	super(msg);
    }
}

struct regmatch_t
{
    int rm_so;			// index of start of match
    int rm_eo;			// index past end of match
}

private alias char rchar;	// so we can make a wchar version

/******************************************************
 * Search string for matches with regular expression
 * pattern with attributes.
 * Replace each match with string generated from format.
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	format = Replacement string format.
 *	attributes = Regular expression attributes.
 * Returns:
 *	the resulting string
 * Example:
 *	Replace the letters 'a' with the letters 'ZZ'.
 * ---
 * s = "Strap a rocket engine on a chicken."
 * sub(s, "a", "ZZ")        // result: StrZZp a rocket engine on a chicken.
 * sub(s, "a", "ZZ", "g")   // result: StrZZp ZZ rocket engine on ZZ chicken.
 * ---
 *	The replacement format can reference the matches using
 *	the $&amp;, $$, $', $`, $0 .. $99 notation:
 * ---
 * sub(s, "[ar]", "[$&]", "g") // result: St[r][a]p [a] [r]ocket engine on [a] chi
 * ---
 */

char[] sub(char[] string, char[] pattern, char[] format, char[] attributes = null)
{
    auto r = new RegExp(pattern, attributes);
    auto result = r.replace(string, format);
    delete r;
    return result;
}

unittest
{
    debug(regexp) printf("regexp.sub.unittest\n");

    char[] r = sub("hello", "ll", "ss");
    assert(r == "hesso");
}

/*******************************************************
 * Search string for matches with regular expression
 * pattern with attributes.
 * Pass each match to delegate dg.
 * Replace each match with the return value from dg.
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	dg = Delegate
 *	attributes = Regular expression attributes.
 * Returns: the resulting string.
 * Example:
 * Capitalize the letters 'a' and 'r':
 * ---
 * s = "Strap a rocket engine on a chicken.";
 * sub(s, "[ar]",
 *    delegate char[] (RegExp m)
 *    {
 *         return toupper(m.match(0));
 *    },
 *    "g");    // result: StRAp A Rocket engine on A chicken.
 * ---
 */

char[] sub(char[] string, char[] pattern, char[] delegate(RegExp) dg, char[] attributes = null)
{
    auto r = new RegExp(pattern, attributes);
    rchar[] result;
    int lastindex;
    int offset;

    result = string;
    lastindex = 0;
    offset = 0;
    while (r.test(string, lastindex))
    {
	int so = r.pmatch[0].rm_so;
	int eo = r.pmatch[0].rm_eo;

	rchar[] replacement = dg(r);

	// Optimize by using std.string.replace if possible - Dave Fladebo
	rchar[] slice = result[offset + so .. offset + eo];
	if (r.attributes & RegExp.REA.global &&		// global, so replace all
	    !(r.attributes & RegExp.REA.ignoreCase) &&	// not ignoring case
	    !(r.attributes & RegExp.REA.multiline) &&	// not multiline
	    pattern == slice)				// simple pattern (exact match, no special characters) 
	{
	    debug(regexp)
		printf("pattern: %.*s, slice: %.*s, replacement: %.*s\n",
		    cast(int) pattern.length, pattern.ptr,
		    cast(int) (eo-so), result.ptr + offset,
		    cast(int) replacement.length, replacement.ptr);
	    result = std.string.replace(result,slice,replacement);
	    break;
	}

	result = replaceSlice(result, result[offset + so .. offset + eo], replacement);

	if (r.attributes & RegExp.REA.global)
	{
	    offset += replacement.length - (eo - so);

	    if (lastindex == eo)
		lastindex++;		// always consume some source
	    else
		lastindex = eo;
	}
	else
	    break;
    }
    delete r;

    return result;
}

unittest
{
    debug(regexp) printf("regexp.sub.unittest\n");

    char[] foo(RegExp r) { return "ss"; }

    char[] r = sub("hello", "ll", delegate char[](RegExp r) { return "ss"; });
    assert(r == "hesso");

    r = sub("hello", "l", delegate char[](RegExp r) { return "l"; }, "g");
    assert(r == "hello");

    auto s = sub("Strap a rocket engine on a chicken.",
		 "[ar]",
	         delegate char[] (RegExp m)
	         {
		    return std.string.toupper(m.match(0));
	         },
	         "g");
    assert(s == "StRAp A Rocket engine on A chicken.");
}


/*************************************************
 * Search string[] for first match with pattern[] with attributes[].
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	attributes = Regular expression attributes.
 * Returns:
 *	index into string[] of match if found, -1 if no match.
 * Example:
 * ---
 * auto s = "abcabcabab";
 * std.regexp.find(s, "b");    // match, returns 1
 * std.regexp.find(s, "f");    // no match, returns -1
 * ---
 */

int find(rchar[] string, char[] pattern, char[] attributes = null)
{
    int i = -1;

    auto r = new RegExp(pattern, attributes);
    if (r.test(string))
    {
	i = r.pmatch[0].rm_so;
    }
    delete r;
    return i;
}

unittest
{
    debug(regexp) printf("regexp.find.unittest\n");

    int i;
    i = find("xabcy", "abc");
    assert(i == 1);
    i = find("cba", "abc");
    assert(i == -1);
}



/*************************************************
 * Search string[] for last match with pattern[] with attributes[].
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	attributes = Regular expression attributes.
 * Returns:
 *	index into string[] of match if found, -1 if no match.
 * Example:
 * ---
 * auto s = "abcabcabab";
 * std.regexp.find(s, "b");    // match, returns 9
 * std.regexp.find(s, "f");    // no match, returns -1
 * ---
 */

int rfind(rchar[] string, char[] pattern, char[] attributes = null)
{
    int i = -1;
    int lastindex = 0;

    auto r = new RegExp(pattern, attributes);
    while (r.test(string, lastindex))
    {   int eo = r.pmatch[0].rm_eo;
	i = r.pmatch[0].rm_so;
	if (lastindex == eo)
	    lastindex++;		// always consume some source
	else
	    lastindex = eo;
    }
    delete r;
    return i;
}

unittest
{
    int i;

    debug(regexp) printf("regexp.rfind.unittest\n");
    i = rfind("abcdefcdef", "c");
    assert(i == 6);
    i = rfind("abcdefcdef", "cd");
    assert(i == 6);
    i = rfind("abcdefcdef", "x");
    assert(i == -1);
    i = rfind("abcdefcdef", "xy");
    assert(i == -1);
    i = rfind("abcdefcdef", "");
    assert(i == 10);
}


/********************************************
 * Split string[] into an array of strings, using the regular
 * expression pattern[] with attributes[] as the separator.
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	attributes = Regular expression attributes.
 * Returns:
 * 	array of slices into string[]
 * Example:
 * ---
 * foreach (s; split("abcabcabab", "C.", "i"))
 * {
 *     writefln("s = '%s'", s);
 * }
 * // Prints:
 * // s = 'ab'
 * // s = 'b'
 * // s = 'bab'
 * ---
 */

char[][] split(char[] string, char[] pattern, char[] attributes = null)
{
    auto r = new RegExp(pattern, attributes);
    auto result = r.split(string);
    delete r;
    return result;
}

unittest
{
    debug(regexp) printf("regexp.split.unittest()\n");
    char[][] result;

    result = split("ab", "a*");
    assert(result.length == 2);
    assert(result[0] == "");
    assert(result[1] == "b");

    foreach (i, s; split("abcabcabab", "C.", "i"))
    {
	writefln("s[%d] = '%s'", i, s);
	if (i == 0) assert(s == "ab");
	else if (i == 1) assert(s == "b");
	else if (i == 2) assert(s == "bab");
	else assert(0);
    }
}

/****************************************************
 * Search string[] for first match with pattern[] with attributes[].
 * Params:
 *	string = String to search.
 *	pattern = Regular expression pattern.
 *	attributes = Regular expression attributes.
 * Returns:
 *	corresponding RegExp if found, null if not.
 * Example:
 * ---
 * import std.stdio;
 * import std.regexp;
 *
 * void main()
 * {
 *     if (auto m = std.regexp.search("abcdef", "c"))
 *     {
 *         writefln("%s[%s]%s", m.pre, m.match(0), m.post);
 *     }
 * }
 * // Prints:
 * // ab[c]def
 * ---
 */

RegExp search(char[] string, char[] pattern, char[] attributes = null)
{
    auto r = new RegExp(pattern, attributes);

    if (r.test(string))
    {
    }
    else
    {	delete r;
	r = null;
    }
    return r;
}

unittest
{
    debug(regexp) printf("regexp.string.unittest()\n");

    if (auto m = std.regexp.search("abcdef", "c()"))
    {
	auto result = std.string.format("%s[%s]%s", m.pre, m.match(0), m.post);
	assert(result == "ab[c]def");
	assert(m.match(1) == null);
	assert(m.match(2) == null);
    }
    else
	assert(0);

    if (auto n = std.regexp.search("abcdef", "g"))
    {
	assert(0);
    }
}


/* ********************************* RegExp ******************************** */

/*****************************
 * RegExp is a class to handle regular expressions.
 *
 * It is the core foundation for adding powerful string pattern matching
 * capabilities to programs like grep, text editors, awk, sed, etc.
 */
class RegExp
{
    /*****
     * Construct a RegExp object. Compile pattern
     * with <i>attributes</i> into
     * an internal form for fast execution.
     * Params:
     *	pattern = regular expression
     *  attributes = _attributes
     * Throws: RegExpException if there are any compilation errors.
     * Example:
     *  Declare two variables and assign to them a RegExp object:
     * ---
     * auto r = new RegExp("pattern");
     * auto s = new RegExp(r"p[1-5]\s*");
     * ---
     */
    public this(rchar[] pattern, rchar[] attributes = null)
    {
	pmatch = (&gmatch)[0 .. 1];
	compile(pattern, attributes);
    }

    /*****
     * Generate instance of RegExp.
     * Params:
     *	pattern = regular expression
     *  attributes = _attributes
     * Throws: RegExpException if there are any compilation errors.
     * Example:
     *  Declare two variables and assign to them a RegExp object:
     * ---
     * auto r = RegExp("pattern");
     * auto s = RegExp(r"p[1-5]\s*");
     * ---
     */
    public static RegExp opCall(rchar[] pattern, rchar[] attributes = null)
    {
	return new RegExp(pattern, attributes);
    }

    unittest
    {
	debug(regexp) printf("regexp.opCall.unittest()\n");
	auto r1 = RegExp("hello", "m");
	char[] msg;
	try
	{
	    auto r2 = RegExp("hello", "q");
	    assert(0);
	}
	catch (RegExpException ree)
	{
	    msg = ree.toString();
	    //writefln("message: %s", ree);
	}
	assert(msg == "unrecognized attribute");
    }

    /************************************
     * Set up for start of foreach loop.
     * Returns:
     *	search() returns instance of RegExp set up to _search string[].
     * Example:
     * ---
     * import std.stdio;
     * import std.regexp;
     *
     * void main()
     * {
     *     foreach(m; RegExp("ab").search("abcabcabab"))
     *     {
     *         writefln("%s[%s]%s", m.pre, m.match(0), m.post);
     *     }
     * }
     * // Prints:
     * // [ab]cabcabab
     * // abc[ab]cabab
     * // abcabc[ab]ab
     * // abcabcab[ab]
     * ---
     */

    public RegExp search(rchar[] string)
    {
	input = string;
	pmatch[0].rm_eo = 0;
	return this;
    }

    /** ditto */
    public int opApply(int delegate(inout RegExp) dg)
    {
	int result;
	RegExp r = this;

	while (test())
	{
	    result = dg(r);
	    if (result)
		break;
	}

	return result;
    }

    unittest
    {
	debug(regexp) printf("regexp.search.unittest()\n");

	int i;
	foreach(m; RegExp("ab").search("abcabcabab"))
	{
	    auto s = std.string.format("%s[%s]%s", m.pre, m.match(0), m.post);
	    if (i == 0) assert(s == "[ab]cabcabab");
	    else if (i == 1) assert(s == "abc[ab]cabab");
	    else if (i == 2) assert(s == "abcabc[ab]ab");
	    else if (i == 3) assert(s == "abcabcab[ab]");
	    else assert(0);
	    i++;
	}
    }

    /******************
     * Retrieve match n.
     *
     * n==0 means the matched substring, n>0 means the
     * n'th parenthesized subexpression.
     * if n is larger than the number of parenthesized subexpressions,
     * null is returned.
     */
    public char[] match(size_t n)
    {
	if (n >= pmatch.length)
	    return null;
	else
	{   size_t rm_so, rm_eo;
	    rm_so = pmatch[n].rm_so;
	    rm_eo = pmatch[n].rm_eo;
	    if (rm_so == rm_eo)
		return null;
	    return input[rm_so .. rm_eo];
	}
    }

    /*******************
     * Return the slice of the input that precedes the matched substring.
     */
    public char[] pre()
    {
	return input[0 .. pmatch[0].rm_so];
    }

    /*******************
     * Return the slice of the input that follows the matched substring.
     */
    public char[] post()
    {
	return input[pmatch[0].rm_eo .. $];
    }

    uint re_nsub;		// number of parenthesized subexpression matches
    regmatch_t[] pmatch;	// array [re_nsub + 1]

    rchar[] input;		// the string to search

    // per instance:

    rchar[] pattern;		// source text of the regular expression

    rchar[] flags;		// source text of the attributes parameter

    int errors;

    uint attributes;

    enum REA
    {
	global		= 1,	// has the g attribute
	ignoreCase	= 2,	// has the i attribute
	multiline	= 4,	// if treat as multiple lines separated
				// by newlines, or as a single line
	dotmatchlf	= 8,	// if . matches \n
    }


private:
    size_t src;			// current source index in input[]
    size_t src_start;		// starting index for match in input[]
    size_t p;			// position of parser in pattern[]
    regmatch_t gmatch;		// match for the entire regular expression
				// (serves as storage for pmatch[0])

    ubyte[] program;		// pattern[] compiled into regular expression program
    OutBuffer buf;




/******************************************/

// Opcodes

enum : ubyte
{
    REend,		// end of program
    REchar,		// single character
    REichar,		// single character, case insensitive
    REdchar,		// single UCS character
    REidchar,		// single wide character, case insensitive
    REanychar,		// any character
    REanystar,		// ".*"
    REstring,		// string of characters
    REistring,		// string of characters, case insensitive
    REtestbit,		// any in bitmap, non-consuming
    REbit,		// any in the bit map
    REnotbit,		// any not in the bit map
    RErange,		// any in the string
    REnotrange,		// any not in the string
    REor,		// a | b
    REplus,		// 1 or more
    REstar,		// 0 or more
    REquest,		// 0 or 1
    REnm,		// n..m
    REnmq,		// n..m, non-greedy version
    REbol,		// beginning of line
    REeol,		// end of line
    REparen,		// parenthesized subexpression
    REgoto,		// goto offset

    REwordboundary,
    REnotwordboundary,
    REdigit,
    REnotdigit,
    REspace,
    REnotspace,
    REword,
    REnotword,
    REbackref,
};

// BUG: should this include '$'?
private int isword(dchar c) { return isalnum(c) || c == '_'; }

private uint inf = ~0u;

/* ********************************
 * Throws RegExpException on error
 */

public void compile(rchar[] pattern, rchar[] attributes)
{
    //printf("RegExp.compile('%.*s', '%.*s')\n", pattern, attributes);

    this.attributes = 0;
    foreach (rchar c; attributes)
    {   REA att;

	switch (c)
	{
	    case 'g': att = REA.global;		break;
	    case 'i': att = REA.ignoreCase;	break;
	    case 'm': att = REA.multiline;	break;
	    default:
		error("unrecognized attribute");
		return;
	}
	if (this.attributes & att)
	{   error("redundant attribute");
	    return;
	}
	this.attributes |= att;
    }

    input = null;

    this.pattern = pattern;
    this.flags = attributes;

    uint oldre_nsub = re_nsub;
    re_nsub = 0;
    errors = 0;

    buf = new OutBuffer();
    buf.reserve(pattern.length * 8);
    p = 0;
    parseRegexp();
    if (p < pattern.length)
    {	error("unmatched ')'");
    }
    optimize();
    program = buf.data;
    buf.data = null;
    delete buf;

    if (re_nsub > oldre_nsub)
    {
	if (pmatch.ptr is &gmatch)
	    pmatch = null;
	pmatch.length = re_nsub + 1;
    }
    pmatch[0].rm_so = 0;
    pmatch[0].rm_eo = 0;
}

/********************************************
 * Split string[] into an array of strings, using the regular
 * expression as the separator.
 * Returns:
 * 	array of slices into string[]
 */

public rchar[][] split(rchar[] string)
{
    debug(regexp) printf("regexp.split()\n");

    rchar[][] result;

    if (string.length)
    {
	int p = 0;
	int q;
	for (q = p; q != string.length;)
	{
	    if (test(string, q))
	    {	int e;

		q = pmatch[0].rm_so;
		e = pmatch[0].rm_eo;
		if (e != p)
		{
		    result ~= string[p .. q];
		    for (int i = 1; i < pmatch.length; i++)
		    {
			int so = pmatch[i].rm_so;
			int eo = pmatch[i].rm_eo;
			if (so == eo)
			{   so = 0;	// -1 gives array bounds error
			    eo = 0;
			}
			result ~= string[so .. eo];
		    }
		    q = p = e;
		    continue;
		}
	    }
	    q++;
	}
	result ~= string[p .. string.length];
    }
    else if (!test(string))
	result ~= string;
    return result;
}

unittest
{
    debug(regexp) printf("regexp.split.unittest()\n");

    auto r = new RegExp("a*?", null);
    rchar[][] result;
    rchar[] j;
    int i;

    result = r.split("ab");

    assert(result.length == 2);
    i = std.string.cmp(result[0], "a");
    assert(i == 0);
    i = std.string.cmp(result[1], "b");
    assert(i == 0);

    r = new RegExp("a*", null);
    result = r.split("ab");
    assert(result.length == 2);
    i = std.string.cmp(result[0], "");
    assert(i == 0);
    i = std.string.cmp(result[1], "b");
    assert(i == 0);

    r = new RegExp("<(\\/)?([^<>]+)>", null);
    result = r.split("a<b>font</b>bar<TAG>hello</TAG>");

    for (i = 0; i < result.length; i++)
    {
	//debug(regexp) printf("result[%d] = '%.*s'\n", i, result[i]);
    }

    j = join(result, ",");
    //printf("j = '%.*s'\n", j);
    i = std.string.cmp(j, "a,,b,font,/,b,bar,,TAG,hello,/,TAG,");
    assert(i == 0);

    r = new RegExp("a[bc]", null);
    result = r.match("123ab");
    j = join(result, ",");
    i = std.string.cmp(j, "ab");
    assert(i == 0);
    
    result = r.match("ac");
    j = join(result, ",");
    i = std.string.cmp(j, "ac");
    assert(i == 0);
}

/*************************************************
 * Search string[] for match with regular expression.
 * Returns:
 *	index of match if successful, -1 if not found
 */

public int find(rchar[] string)
{
    int i;

    i = test(string);
    if (i)
	i = pmatch[0].rm_so;
    else
	i = -1;			// no match
    return i;
}

//deprecated alias find search;

unittest
{
    debug(regexp) printf("regexp.find.unittest()\n");

    int i;
    RegExp r = new RegExp("abc", null);
    i = r.find("xabcy");
    assert(i == 1);
    i = r.find("cba");
    assert(i == -1);
}


/*************************************************
 * Search string[] for match.
 * Returns:
 *	If global attribute, return same value as exec(string).
 *	If not global attribute, return array of all matches.
 */

public rchar[][] match(rchar[] string)
{
    rchar[][] result;

    if (attributes & REA.global)
    {
	int lastindex = 0;

	while (test(string, lastindex))
	{   int eo = pmatch[0].rm_eo;

	    result ~= input[pmatch[0].rm_so .. eo];
	    if (lastindex == eo)
		lastindex++;		// always consume some source
	    else
		lastindex = eo;
	}
    }
    else
    {
	result = exec(string);
    }
    return result;
}

unittest
{
    debug(regexp) printf("regexp.match.unittest()\n");

    int i;
    rchar[][] result;
    rchar[] j;
    RegExp r;

    r = new RegExp("a[bc]", null);
    result = r.match("1ab2ac3");
    j = join(result, ",");
    i = std.string.cmp(j, "ab");
    assert(i == 0);

    r = new RegExp("a[bc]", "g");
    result = r.match("1ab2ac3");
    j = join(result, ",");
    i = std.string.cmp(j, "ab,ac");
    assert(i == 0);
}


/*************************************************
 * Find regular expression matches in string[]. Replace those matches
 * with a new _string composed of format[] merged with the result of the
 * matches.
 * If global, replace all matches. Otherwise, replace first match.
 * Returns: the new _string
 */

public rchar[] replace(rchar[] string, rchar[] format)
{
    rchar[] result;
    int lastindex;
    int offset;

    result = string;
    lastindex = 0;
    offset = 0;
    for (;;)
    {
	if (!test(string, lastindex))
	    break;

	int so = pmatch[0].rm_so;
	int eo = pmatch[0].rm_eo;

	rchar[] replacement = replace(format);

	// Optimize by using std.string.replace if possible - Dave Fladebo
	rchar[] slice = result[offset + so .. offset + eo];
	if (attributes & REA.global &&		// global, so replace all
	   !(attributes & REA.ignoreCase) &&	// not ignoring case
	   !(attributes & REA.multiline) &&	// not multiline
	   pattern == slice &&			// simple pattern (exact match, no special characters) 
	   format == replacement)		// simple format, not $ formats
	{
	    debug(regexp)
		printf("pattern: %.*s, slice: %.*s, format: %.*s, replacement: %.*s\n",
		    cast(int) pattern.length, pattern.ptr,
		    cast(int) (eo-so), result.ptr + offset,
		    cast(int) format.length, format.ptr,
		    cast(int) replacement.length, replacement.ptr);
	    result = std.string.replace(result,slice,replacement);
	    break;
	}

	result = replaceSlice(result, result[offset + so .. offset + eo], replacement);

	if (attributes & REA.global)
	{
	    offset += replacement.length - (eo - so);

	    if (lastindex == eo)
		lastindex++;		// always consume some source
	    else
		lastindex = eo;
	}
	else
	    break;
    }

    return result;
}

unittest
{
    debug(regexp) printf("regexp.replace.unittest()\n");

    int i;
    rchar[] result;
    RegExp r;

    r = new RegExp("a[bc]", "g");
    result = r.replace("1ab2ac3", "x$&y");
    i = std.string.cmp(result, "1xaby2xacy3");
    assert(i == 0);

    r = new RegExp("ab", "g");
    result = r.replace("1ab2ac3", "xy");
    i = std.string.cmp(result, "1xy2ac3");
    assert(i == 0);
}


/*************************************************
 * Search string[] for match.
 * Returns:
 *	array of slices into string[] representing matches
 */

public rchar[][] exec(rchar[] string)
{
    debug(regexp) printf("regexp.exec(string = '%.*s')\n",
	cast(int) string.length, string.ptr);
    input = string;
    pmatch[0].rm_so = 0;
    pmatch[0].rm_eo = 0;
    return exec();
}

/*************************************************
 * Pick up where last exec(string) or exec() left off,
 * searching string[] for next match.
 * Returns:
 *	array of slices into string[] representing matches
 */

public rchar[][] exec()
{
    if (!test())
	return null;

    auto result = new rchar[][pmatch.length];
    for (int i = 0; i < pmatch.length; i++)
    {
	if (pmatch[i].rm_so == pmatch[i].rm_eo)
	    result[i] = null;
	else
	    result[i] = input[pmatch[i].rm_so .. pmatch[i].rm_eo];
    }

    return result;
}

/************************************************
 * Search string[] for match.
 * Returns: 0 for no match, !=0 for match
 * Example:
---
import std.stdio;
import std.regexp;
import std.string;

int grep(int delegate(char[]) pred, char[][] list)
{
  int count;
  foreach (s; list)
  {  if (pred(s))
       ++count;
  }
  return count;
}

void main()
{
  auto x = grep(&RegExp("[Ff]oo").test,
                std.string.split("mary had a foo lamb"));
  writefln(x);
}
---
 * which prints: 1
 */

public int test(rchar[] string)
{
    return test(string, 0 /*pmatch[0].rm_eo*/);
}

/************************************************
 * Pick up where last test(string) or test() left off, and search again.
 * Returns: 0 for no match, !=0 for match
 */

public int test()
{
    return test(input, pmatch[0].rm_eo);
}

/************************************************
 * Test string[] starting at startindex against regular expression.
 * Returns: 0 for no match, !=0 for match
 */

public int test(char[] string, int startindex)
{
    char firstc;
    uint si;

    input = string;
    debug (regexp) printf("RegExp.test(input[] = '%.*s', startindex = %d)\n",
	cast(int) input.length, input.ptr, startindex);
    pmatch[0].rm_so = 0;
    pmatch[0].rm_eo = 0;
    if (startindex < 0 || startindex > input.length)
    {
	return 0;			// fail
    }
    //debug(regexp) printProgram(program);

    // First character optimization
    firstc = 0;
    if (program[0] == REchar)
    {
	firstc = program[1];
	if (attributes & REA.ignoreCase && isalpha(firstc))
	    firstc = 0;
    }

    for (si = startindex; ; si++)
    {
	if (firstc)
	{
	    if (si == input.length)
		break;			// no match
	    if (input[si] != firstc)
	    {
		si++;
		if (!chr(si, firstc))	// if first character not found
		    break;		// no match
	    }
	}
	for (int i = 0; i < re_nsub + 1; i++)
	{
	    pmatch[i].rm_so = -1;
	    pmatch[i].rm_eo = -1;
	}
	src_start = src = si;
	if (trymatch(0, program.length))
	{
	    pmatch[0].rm_so = si;
	    pmatch[0].rm_eo = src;
	    //debug(regexp) printf("start = %d, end = %d\n", gmatch.rm_so, gmatch.rm_eo);
	    return 1;
	}
	// If possible match must start at beginning, we are done
	if (program[0] == REbol || program[0] == REanystar)
	{
	    if (attributes & REA.multiline)
	    {
		// Scan for the next \n
		if (!chr(si, '\n'))
		    break;		// no match if '\n' not found
	    }
	    else
		break;
	}
	if (si == input.length)
	    break;
	//debug(regexp) printf("Starting new try: '%.*s'\n", input[si + 1 .. input.length]);
    }
    return 0;		// no match
}

int chr(inout uint si, rchar c)
{
    for (; si < input.length; si++)
    {
	if (input[si] == c)
	    return 1;
    }
    return 0;
}


void printProgram(ubyte[] prog)
{
  //debug(regexp)
  {
    uint pc;
    uint len;
    uint n;
    uint m;
    ushort *pu;
    uint *puint;
    ubyte[] s;

    printf("printProgram()\n");
    for (pc = 0; pc < prog.length; )
    {
	printf("%3d: ", pc);

	//printf("prog[pc] = %d, REchar = %d, REnmq = %d\n", prog[pc], REchar, REnmq);
	switch (prog[pc])
	{
	    case REchar:
		printf("\tREchar '%c'\n", prog[pc + 1]);
		pc += 1 + char.sizeof;
		break;

	    case REichar:
		printf("\tREichar '%c'\n", prog[pc + 1]);
		pc += 1 + char.sizeof;
		break;

	    case REdchar:
		printf("\tREdchar '%c'\n", *cast(dchar *)&prog[pc + 1]);
		pc += 1 + dchar.sizeof;
		break;

	    case REidchar:
		printf("\tREidchar '%c'\n", *cast(dchar *)&prog[pc + 1]);
		pc += 1 + dchar.sizeof;
		break;

	    case REanychar:
		printf("\tREanychar\n");
		pc++;
		break;

	    case REstring:
		len = *cast(uint *)&prog[pc + 1];
		s = (&prog[pc + 1 + uint.sizeof])[0 .. len];
		printf("\tREstring x%x, '%.*s'\n", len,
		    cast(int) s.length, s.ptr);
		pc += 1 + uint.sizeof + len * rchar.sizeof;
		break;

	    case REistring:
		len = *cast(uint *)&prog[pc + 1];
		s = (&prog[pc + 1 + uint.sizeof])[0 .. len];
		printf("\tREistring x%x, '%.*s'\n", len,
		    cast(int) s.length, s.ptr);
		pc += 1 + uint.sizeof + len * rchar.sizeof;
		break;

	    case REtestbit:
		pu = cast(ushort *)&prog[pc + 1];
		printf("\tREtestbit %d, %d\n", pu[0], pu[1]);
		len = pu[1];
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case REbit:
		pu = cast(ushort *)&prog[pc + 1];
		len = pu[1];
		printf("\tREbit cmax=%02x, len=%d:", pu[0], len);
		for (n = 0; n < len; n++)
		    printf(" %02x", prog[pc + 1 + 2 * ushort.sizeof + n]);
		printf("\n");
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case REnotbit:
		pu = cast(ushort *)&prog[pc + 1];
		printf("\tREnotbit %d, %d\n", pu[0], pu[1]);
		len = pu[1];
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case RErange:
		len = *cast(uint *)&prog[pc + 1];
		printf("\tRErange %d\n", len);
		// BUG: REAignoreCase?
		pc += 1 + uint.sizeof + len;
		break;

	    case REnotrange:
		len = *cast(uint *)&prog[pc + 1];
		printf("\tREnotrange %d\n", len);
		// BUG: REAignoreCase?
		pc += 1 + uint.sizeof + len;
		break;

	    case REbol:
		printf("\tREbol\n");
		pc++;
		break;

	    case REeol:
		printf("\tREeol\n");
		pc++;
		break;

	    case REor:
		len = *cast(uint *)&prog[pc + 1];
		printf("\tREor %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len);
		pc += 1 + uint.sizeof;
		break;

	    case REgoto:
		len = *cast(uint *)&prog[pc + 1];
		printf("\tREgoto %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len);
		pc += 1 + uint.sizeof;
		break;

	    case REanystar:
		printf("\tREanystar\n");
		pc++;
		break;

	    case REnm:
	    case REnmq:
		// len, n, m, ()
		puint = cast(uint *)&prog[pc + 1];
		len = puint[0];
		n = puint[1];
		m = puint[2];
		printf("\tREnm%s len=%d, n=%u, m=%u, pc=>%d\n",
		    (prog[pc] == REnmq) ? cast(char*)"q" : cast(char*)" ",
		    len, n, m, pc + 1 + uint.sizeof * 3 + len);
		pc += 1 + uint.sizeof * 3;
		break;

	    case REparen:
		// len, n, ()
		puint = cast(uint *)&prog[pc + 1];
		len = puint[0];
		n = puint[1];
		printf("\tREparen len=%d n=%d, pc=>%d\n", len, n, pc + 1 + uint.sizeof * 2 + len);
		pc += 1 + uint.sizeof * 2;
		break;

	    case REend:
		printf("\tREend\n");
		return;

	    case REwordboundary:
		printf("\tREwordboundary\n");
		pc++;
		break;

	    case REnotwordboundary:
		printf("\tREnotwordboundary\n");
		pc++;
		break;

	    case REdigit:
		printf("\tREdigit\n");
		pc++;
		break;

	    case REnotdigit:
		printf("\tREnotdigit\n");
		pc++;
		break;

	    case REspace:
		printf("\tREspace\n");
		pc++;
		break;

	    case REnotspace:
		printf("\tREnotspace\n");
		pc++;
		break;

	    case REword:
		printf("\tREword\n");
		pc++;
		break;

	    case REnotword:
		printf("\tREnotword\n");
		pc++;
		break;

	    case REbackref:
		printf("\tREbackref %d\n", prog[1]);
		pc += 2;
		break;

	    default:
		assert(0);
	}
    }
  }
}


/**************************************************
 * Match input against a section of the program[].
 * Returns:
 *	1 if successful match
 *	0 no match
 */

int trymatch(int pc, int pcend)
{   int srcsave;
    uint len;
    uint n;
    uint m;
    uint count;
    uint pop;
    uint ss;
    regmatch_t *psave;
    uint c1;
    uint c2;
    ushort* pu;
    uint* puint;

    debug(regexp)
    {
	char[] s = input[src .. input.length];
	printf("RegExp.trymatch(pc = %d, src = '%.*s', pcend = %d)\n",
	    pc, cast(int) s.length, s.ptr, pcend);
    }
    srcsave = src;
    psave = null;
    for (;;)
    {
	if (pc == pcend)		// if done matching
	{   debug(regex) printf("\tprogend\n");
	    return 1;
	}

	//printf("\top = %d\n", program[pc]);
	switch (program[pc])
	{
	    case REchar:
		if (src == input.length)
		    goto Lnomatch;
		debug(regexp) printf("\tREchar '%c', src = '%c'\n", program[pc + 1], input[src]);
		if (program[pc + 1] != input[src])
		    goto Lnomatch;
		src++;
		pc += 1 + char.sizeof;
		break;

	    case REichar:
		if (src == input.length)
		    goto Lnomatch;
		debug(regexp) printf("\tREichar '%c', src = '%c'\n", program[pc + 1], input[src]);
		c1 = program[pc + 1];
		c2 = input[src];
		if (c1 != c2)
		{
		    if (islower(cast(rchar)c2))
			c2 = std.ctype.toupper(cast(rchar)c2);
		    else
			goto Lnomatch;
		    if (c1 != c2)
			goto Lnomatch;
		}
		src++;
		pc += 1 + char.sizeof;
		break;

	    case REdchar:
		debug(regexp) printf("\tREdchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]);
		if (src == input.length)
		    goto Lnomatch;
		if (*(cast(dchar *)&program[pc + 1]) != input[src])
		    goto Lnomatch;
		src++;
		pc += 1 + dchar.sizeof;
		break;

	    case REidchar:
		debug(regexp) printf("\tREidchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]);
		if (src == input.length)
		    goto Lnomatch;
		c1 = *(cast(dchar *)&program[pc + 1]);
		c2 = input[src];
		if (c1 != c2)
		{
		    if (islower(cast(rchar)c2))
			c2 = std.ctype.toupper(cast(rchar)c2);
		    else
			goto Lnomatch;
		    if (c1 != c2)
			goto Lnomatch;
		}
		src++;
		pc += 1 + dchar.sizeof;
		break;

	    case REanychar:
		debug(regexp) printf("\tREanychar\n");
		if (src == input.length)
		    goto Lnomatch;
		if (!(attributes & REA.dotmatchlf) && input[src] == cast(rchar)'\n')
		    goto Lnomatch;
		src += std.utf.stride(input, src);
		//src++;
		pc++;
		break;

	    case REstring:
		len = *cast(uint *)&program[pc + 1];
		debug(regexp)
		{
		    char[] s = (&program[pc + 1 + uint.sizeof])[0 .. len];
		    printf("\tREstring x%x, '%.*s'\n", len,
			cast(int) s.length, s.ptr);
		}
		if (src + len > input.length)
		    goto Lnomatch;
		if (memcmp(&program[pc + 1 + uint.sizeof], &input[src], len * rchar.sizeof))
		    goto Lnomatch;
		src += len;
		pc += 1 + uint.sizeof + len * rchar.sizeof;
		break;

	    case REistring:
		len = *cast(uint *)&program[pc + 1];
		debug(regexp)
		{
		    char[] s = (&program[pc + 1 + uint.sizeof])[0 .. len];
		    printf("\tREistring x%x, '%.*s'\n", len,
			cast(int) s.length, s.ptr);
		}
		if (src + len > input.length)
		    goto Lnomatch;
		version (Win32)
		{
		    if (memicmp(cast(char*)&program[pc + 1 + uint.sizeof], &input[src], len * rchar.sizeof))
			goto Lnomatch;
		}
		else
		{
		    if (icmp((cast(char*)&program[pc + 1 + uint.sizeof])[0..len],
			     input[src .. src + len]))
			goto Lnomatch;
		}
		src += len;
		pc += 1 + uint.sizeof + len * rchar.sizeof;
		break;

	    case REtestbit:
		pu = (cast(ushort *)&program[pc + 1]);
		debug(regexp) printf("\tREtestbit %d, %d, '%c', x%02x\n",
		    pu[0], pu[1], input[src], input[src]);
		if (src == input.length)
		    goto Lnomatch;
		len = pu[1];
		c1 = input[src];
		//printf("[x%02x]=x%02x, x%02x\n", c1 >> 3, ((&program[pc + 1 + 4])[c1 >> 3] ), (1 << (c1 & 7)));
		if (c1 <= pu[0] &&
		    !bt(cast(uint*)&(program[pc + 1 + 4]), c1)) // assumes BitArray implementation
		    goto Lnomatch;
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case REbit:
		pu = (cast(ushort *)&program[pc + 1]);
		debug(regexp) printf("\tREbit %d, %d, '%c'\n",
		    pu[0], pu[1], input[src]);
		if (src == input.length)
		    goto Lnomatch;
		len = pu[1];
		c1 = input[src];
		if (c1 > pu[0])
		    goto Lnomatch;
		if (!bt(cast(uint*)&(program[pc + 1 + 4]), c1)) // assumes BitArray implementation
		    goto Lnomatch;
		src++;
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case REnotbit:
		pu = (cast(ushort *)&program[pc + 1]);
		debug(regexp) printf("\tREnotbit %d, %d, '%c'\n",
		    pu[0], pu[1], input[src]);
		if (src == input.length)
		    goto Lnomatch;
		len = pu[1];
		c1 = input[src];
		if (c1 <= pu[0] &&
		    bt(cast(uint*)&(program[pc + 1 + 4]), c1)) // assumes BitArray implementation
		    goto Lnomatch;
		src++;
		pc += 1 + 2 * ushort.sizeof + len;
		break;

	    case RErange:
		len = *cast(uint *)&program[pc + 1];
		debug(regexp) printf("\tRErange %d\n", len);
		if (src == input.length)
		    goto Lnomatch;
		// BUG: REA.ignoreCase?
		if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) == null)
		    goto Lnomatch;
		src++;
		pc += 1 + uint.sizeof + len;
		break;

	    case REnotrange:
		len = *cast(uint *)&program[pc + 1];
		debug(regexp) printf("\tREnotrange %d\n", len);
		if (src == input.length)
		    goto Lnomatch;
		// BUG: REA.ignoreCase?
		if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) != null)
		    goto Lnomatch;
		src++;
		pc += 1 + uint.sizeof + len;
		break;

	    case REbol:
		debug(regexp) printf("\tREbol\n");
		if (src == 0)
		{
		}
		else if (attributes & REA.multiline)
		{
		    if (input[src - 1] != '\n')
			goto Lnomatch;
		}
		else
		    goto Lnomatch;
		pc++;
		break;

	    case REeol:
		debug(regexp) printf("\tREeol\n");
		if (src == input.length)
		{
		}
		else if (attributes & REA.multiline && input[src] == '\n')
		    src++;
		else
		    goto Lnomatch;
		pc++;
		break;

	    case REor:
		len = (cast(uint *)&program[pc + 1])[0];
		debug(regexp) printf("\tREor %d\n", len);
		pop = pc + 1 + uint.sizeof;
		ss = src;
		if (trymatch(pop, pcend))
		{
		    if (pcend != program.length)
		    {	int s;

			s = src;
			if (trymatch(pcend, program.length))
			{   debug(regexp) printf("\tfirst operand matched\n");
			    src = s;
			    return 1;
			}
			else
			{
			    // If second branch doesn't match to end, take first anyway
			    src = ss;
			    if (!trymatch(pop + len, program.length))
			    {
				debug(regexp) printf("\tfirst operand matched\n");
				src = s;
				return 1;
			    }
			}
			src = ss;
		    }
		    else
		    {	debug(regexp) printf("\tfirst operand matched\n");
			return 1;
		    }
		}
		pc = pop + len;		// proceed with 2nd branch
		break;

	    case REgoto:
		debug(regexp) printf("\tREgoto\n");
		len = (cast(uint *)&program[pc + 1])[0];
		pc += 1 + uint.sizeof + len;
		break;

	    case REanystar:
		debug(regexp) printf("\tREanystar\n");
		pc++;
		for (;;)
		{   int s1;
		    int s2;

		    s1 = src;
		    if (src == input.length)
			break;
		    if (!(attributes & REA.dotmatchlf) && input[src] == '\n')
			break;
		    src++;
		    s2 = src;

		    // If no match after consumption, but it
		    // did match before, then no match
		    if (!trymatch(pc, program.length))
		    {
			src = s1;
			// BUG: should we save/restore pmatch[]?
			if (trymatch(pc, program.length))
			{
			    src = s1;		// no match
			    break;
			}
		    }
		    src = s2;
		}
		break;

	    case REnm:
	    case REnmq:
		// len, n, m, ()
		puint = cast(uint *)&program[pc + 1];
		len = puint[0];
		n = puint[1];
		m = puint[2];
		debug(regexp) printf("\tREnm%s len=%d, n=%u, m=%u\n", (program[pc] == REnmq) ? cast(char*)"q" : cast(char*)"", len, n, m);
		pop = pc + 1 + uint.sizeof * 3;
		for (count = 0; count < n; count++)
		{
		    if (!trymatch(pop, pop + len))
			goto Lnomatch;
		}
		if (!psave && count < m)
		{
		    //version (Win32)
			psave = cast(regmatch_t *)alloca((re_nsub + 1) * regmatch_t.sizeof);
		    //else
			//psave = new regmatch_t[re_nsub + 1];
		}
		if (program[pc] == REnmq)	// if minimal munch
		{
		    for (; count < m; count++)
		    {   int s1;

			memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof);
			s1 = src;

			if (trymatch(pop + len, program.length))
			{
			    src = s1;
			    memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof);
			    break;
			}

			if (!trymatch(pop, pop + len))
			{   debug(regexp) printf("\tdoesn't match subexpression\n");
			    break;
			}

			// If source is not consumed, don't
			// infinite loop on the match
			if (s1 == src)
			{   debug(regexp) printf("\tsource is not consumed\n");
			    break;
			}
		    }
		}
		else	// maximal munch
		{
		    for (; count < m; count++)
		    {   int s1;
			int s2;

			memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof);
			s1 = src;
			if (!trymatch(pop, pop + len))
			{   debug(regexp) printf("\tdoesn't match subexpression\n");
			    break;
			}
			s2 = src;

			// If source is not consumed, don't
			// infinite loop on the match
			if (s1 == s2)
			{   debug(regexp) printf("\tsource is not consumed\n");
			    break;
			}

			// If no match after consumption, but it
			// did match before, then no match
			if (!trymatch(pop + len, program.length))
			{
			    src = s1;
			    if (trymatch(pop + len, program.length))
			    {
				src = s1;		// no match
				memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof);
				break;
			    }
			}
			src = s2;
		    }
		}
		debug(regexp) printf("\tREnm len=%d, n=%u, m=%u, DONE count=%d\n", len, n, m, count);
		pc = pop + len;
		break;

	    case REparen:
		// len, ()
		debug(regexp) printf("\tREparen\n");
		puint = cast(uint *)&program[pc + 1];
		len = puint[0];
		n = puint[1];
		pop = pc + 1 + uint.sizeof * 2;
		ss = src;
		if (!trymatch(pop, pop + len))
		    goto Lnomatch;
		pmatch[n + 1].rm_so = ss;
		pmatch[n + 1].rm_eo = src;
		pc = pop + len;
		break;

	    case REend:
		debug(regexp) printf("\tREend\n");
		return 1;		// successful match

	    case REwordboundary:
		debug(regexp) printf("\tREwordboundary\n");
		if (src > 0 && src < input.length)
		{
		    c1 = input[src - 1];
		    c2 = input[src];
		    if (!(
			  (isword(cast(rchar)c1) && !isword(cast(rchar)c2)) ||
			  (!isword(cast(rchar)c1) && isword(cast(rchar)c2))
			 )
		       )
			goto Lnomatch;
		}
		pc++;
		break;

	    case REnotwordboundary:
		debug(regexp) printf("\tREnotwordboundary\n");
		if (src == 0 || src == input.length)
		    goto Lnomatch;
		c1 = input[src - 1];
		c2 = input[src];
		if (
		    (isword(cast(rchar)c1) && !isword(cast(rchar)c2)) ||
		    (!isword(cast(rchar)c1) && isword(cast(rchar)c2))
		   )
		    goto Lnomatch;
		pc++;
		break;

	    case REdigit:
		debug(regexp) printf("\tREdigit\n");
		if (src == input.length)
		    goto Lnomatch;
		if (!isdigit(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REnotdigit:
		debug(regexp) printf("\tREnotdigit\n");
		if (src == input.length)
		    goto Lnomatch;
		if (isdigit(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REspace:
		debug(regexp) printf("\tREspace\n");
		if (src == input.length)
		    goto Lnomatch;
		if (!isspace(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REnotspace:
		debug(regexp) printf("\tREnotspace\n");
		if (src == input.length)
		    goto Lnomatch;
		if (isspace(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REword:
		debug(regexp) printf("\tREword\n");
		if (src == input.length)
		    goto Lnomatch;
		if (!isword(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REnotword:
		debug(regexp) printf("\tREnotword\n");
		if (src == input.length)
		    goto Lnomatch;
		if (isword(input[src]))
		    goto Lnomatch;
		src++;
		pc++;
		break;

	    case REbackref:
	    {
		n = program[pc + 1];
		debug(regexp) printf("\tREbackref %d\n", n);

		int so = pmatch[n + 1].rm_so;
		int eo = pmatch[n + 1].rm_eo;
		len = eo - so;
		if (src + len > input.length)
		    goto Lnomatch;
		else if (attributes & REA.ignoreCase)
		{
		    if (icmp(input[src .. src + len], input[so .. eo]))
			goto Lnomatch;
		}
		else if (memcmp(&input[src], &input[so], len * rchar.sizeof))
		    goto Lnomatch;
		src += len;
		pc += 2;
		break;
	    }

	    default:
		assert(0);
	}
    }

Lnomatch:
    debug(regexp) printf("\tnomatch pc=%d\n", pc);
    src = srcsave;
    return 0;
}

/* =================== Compiler ================== */

int parseRegexp()
{   uint offset;
    uint gotooffset;
    uint len1;
    uint len2;

    //printf("parseRegexp() '%.*s'\n", pattern[p .. pattern.length]);
    offset = buf.offset;
    for (;;)
    {
	assert(p <= pattern.length);
	if (p == pattern.length)
	{   buf.write(REend);
	    return 1;
	}
	switch (pattern[p])
	{
	    case ')':
		return 1;

	    case '|':
		p++;
		gotooffset = buf.offset;
		buf.write(REgoto);
		buf.write(cast(uint)0);
		len1 = buf.offset - offset;
		buf.spread(offset, 1 + uint.sizeof);
		gotooffset += 1 + uint.sizeof;
		parseRegexp();
		len2 = buf.offset - (gotooffset + 1 + uint.sizeof);
		buf.data[offset] = REor;
		(cast(uint *)&buf.data[offset + 1])[0] = len1;
		(cast(uint *)&buf.data[gotooffset + 1])[0] = len2;
		break;

	    default:
		parsePiece();
		break;
	}
    }
    assert(0);
}

int parsePiece()
{   uint offset;
    uint len;
    uint n;
    uint m;
    ubyte op;
    int plength = pattern.length;

    //printf("parsePiece() '%.*s'\n", pattern[p .. pattern.length]);
    offset = buf.offset;
    parseAtom();
    if (p == plength)
	return 1;
    switch (pattern[p])
    {
	case '*':
	    // Special optimization: replace .* with REanystar
	    if (buf.offset - offset == 1 &&
		buf.data[offset] == REanychar &&
		p + 1 < plength &&
		pattern[p + 1] != '?')
	    {
		buf.data[offset] = REanystar;
		p++;
		break;
	    }

	    n = 0;
	    m = inf;
	    goto Lnm;

	case '+':
	    n = 1;
	    m = inf;
	    goto Lnm;

	case '?':
	    n = 0;
	    m = 1;
	    goto Lnm;

	case '{':	// {n} {n,} {n,m}
	    p++;
	    if (p == plength || !isdigit(pattern[p]))
		goto Lerr;
	    n = 0;
	    do
	    {
		// BUG: handle overflow
		n = n * 10 + pattern[p] - '0';
		p++;
		if (p == plength)
		    goto Lerr;
	    } while (isdigit(pattern[p]));
	    if (pattern[p] == '}')		// {n}
	    {	m = n;
		goto Lnm;
	    }
	    if (pattern[p] != ',')
		goto Lerr;
	    p++;
	    if (p == plength)
		goto Lerr;
	    if (pattern[p] == /*{*/ '}')	// {n,}
	    {	m = inf;
		goto Lnm;
	    }
	    if (!isdigit(pattern[p]))
		goto Lerr;
	    m = 0;			// {n,m}
	    do
	    {
		// BUG: handle overflow
		m = m * 10 + pattern[p] - '0';
		p++;
		if (p == plength)
		    goto Lerr;
	    } while (isdigit(pattern[p]));
	    if (pattern[p] != /*{*/ '}')
		goto Lerr;
	    goto Lnm;

	Lnm:
	    p++;
	    op = REnm;
	    if (p < plength && pattern[p] == '?')
	    {	op = REnmq;	// minimal munch version
		p++;
	    }
	    len = buf.offset - offset;
	    buf.spread(offset, 1 + uint.sizeof * 3);
	    buf.data[offset] = op;
	    uint* puint = cast(uint *)&buf.data[offset + 1];
	    puint[0] = len;
	    puint[1] = n;
	    puint[2] = m;
	    break;

	default:
	    break;
    }
    return 1;

Lerr:
    error("badly formed {n,m}");
    assert(0);
}

int parseAtom()
{   ubyte op;
    uint offset;
    rchar c;

    //printf("parseAtom() '%.*s'\n", pattern[p .. pattern.length]);
    if (p < pattern.length)
    {
	c = pattern[p];
	switch (c)
	{
	    case '*':
	    case '+':
	    case '?':
		error("*+? not allowed in atom");
		p++;
		return 0;

	    case '(':
		p++;
		buf.write(REparen);
		offset = buf.offset;
		buf.write(cast(uint)0);		// reserve space for length
		buf.write(re_nsub);
		re_nsub++;
		parseRegexp();
		*cast(uint *)&buf.data[offset] =
		    buf.offset - (offset + uint.sizeof * 2);
		if (p == pattern.length || pattern[p] != ')')
		{
		    error("')' expected");
		    return 0;
		}
		p++;
		break;

	    case '[':
		if (!parseRange())
		    return 0;
		break;

	    case '.':
		p++;
		buf.write(REanychar);
		break;

	    case '^':
		p++;
		buf.write(REbol);
		break;

	    case '$':
		p++;
		buf.write(REeol);
		break;

	    case '\\':
		p++;
		if (p == pattern.length)
		{   error("no character past '\\'");
		    return 0;
		}
		c = pattern[p];
		switch (c)
		{
		    case 'b':    op = REwordboundary;	 goto Lop;
		    case 'B':    op = REnotwordboundary; goto Lop;
		    case 'd':    op = REdigit;		 goto Lop;
		    case 'D':    op = REnotdigit;	 goto Lop;
		    case 's':    op = REspace;		 goto Lop;
		    case 'S':    op = REnotspace;	 goto Lop;
		    case 'w':    op = REword;		 goto Lop;
		    case 'W':    op = REnotword;	 goto Lop;

		    Lop:
			buf.write(op);
			p++;
			break;

		    case 'f':
		    case 'n':
		    case 'r':
		    case 't':
		    case 'v':
		    case 'c':
		    case 'x':
		    case 'u':
		    case '0':
			c = cast(char)escape();
			goto Lbyte;

		    case '1': case '2': case '3':
		    case '4': case '5': case '6':
		    case '7': case '8': case '9':
			c -= '1';
			if (c < re_nsub)
			{   buf.write(REbackref);
			    buf.write(cast(ubyte)c);
			}
			else
			{   error("no matching back reference");
			    return 0;
			}
			p++;
			break;

		    default:
			p++;
			goto Lbyte;
		}
		break;

	    default:
		p++;
	    Lbyte:
		op = REchar;
		if (attributes & REA.ignoreCase)
		{
		    if (isalpha(c))
		    {
			op = REichar;
			c = cast(char)std.ctype.toupper(c);
		    }
		}
		if (op == REchar && c <= 0xFF)
		{
		    // Look ahead and see if we can make this into
		    // an REstring
		    int q;
		    int len;

		    for (q = p; q < pattern.length; ++q)
		    {	rchar qc = pattern[q];

			switch (qc)
			{
			    case '{':
			    case '*':
			    case '+':
			    case '?':
				if (q == p)
				    goto Lchar;
				q--;
				break;

			    case '(':	case ')':
			    case '|':
			    case '[':	case ']':
			    case '.':	case '^':
			    case '$':	case '\\':
			    case '}':
				break;

			    default:
				continue;
			}
			break;
		    }
		    len = q - p;
		    if (len > 0)
		    {
			debug(regexp) printf("writing string len %d, c = '%c', pattern[p] = '%c'\n", len+1, c, pattern[p]);
			buf.reserve(5 + (1 + len) * rchar.sizeof);
			buf.write((attributes & REA.ignoreCase) ? REistring : REstring);
			buf.write(len + 1);
			buf.write(c);
			buf.write(pattern[p .. p + len]);
			p = q;
			break;
		    }
		}
		if (c >= 0x80)
		{
		    // Convert to dchar opcode
		    op = (op == REchar) ? REdchar : REidchar;
		    buf.write(op);
		    buf.write(c);
		}
		else
		{
		 Lchar:
		    debug(regexp) printf("It's an REchar '%c'\n", c);
		    buf.write(op);
		    buf.write(cast(char)c);
		}
		break;
	}
    }
    return 1;
}

private:
class Range
{
    uint maxc;
    uint maxb;
    OutBuffer buf;
    ubyte* base;
    BitArray bits;

    this(OutBuffer buf)
    {
	this.buf = buf;
	if (buf.data.length)
	    this.base = &buf.data[buf.offset];
    }

    void setbitmax(uint u)
    {   uint b;

	//printf("setbitmax(x%x), maxc = x%x\n", u, maxc);
	if (u > maxc)
	{
	    maxc = u;
	    b = u / 8;
	    if (b >= maxb)
	    {	uint u2;

		u2 = base ? base - &buf.data[0] : 0;
		++b;
		version (BigEndian)
		{
		    while (b & (uint.sizeof-1))
			++b;
		}
		
		buf.fill0(b - maxb);
		base = &buf.data[u2];
		maxb = b;
		// %% moved array recreate out of this condition
		bits.ptr = cast(uint*)this.base;
 	    }
	    //bits = (cast(bit*)this.base)[0 .. maxc + 1];
	    bits.len = maxc + 1;
	}
    }

    void setbit2(uint u)
    {
	setbitmax(u + 1);
	//printf("setbit2 [x%02x] |= x%02x\n", u >> 3, 1 << (u & 7));
	bits[u] = 1;
    }

};

int parseRange()
{   ubyte op;
    int c;
    int c2;
    uint i;
    uint cmax;
    uint offset;

    cmax = 0x7F;
    p++;
    op = REbit;
    if (p == pattern.length)
	goto Lerr;
    if (pattern[p] == '^')
    {   p++;
	op = REnotbit;
	if (p == pattern.length)
	    goto Lerr;
    }
    buf.write(op);
    offset = buf.offset;
    buf.write(cast(uint)0);		// reserve space for length
    buf.reserve(128 / 8);
    auto r = new Range(buf);
    if (op == REnotbit)
	r.setbit2(0);
    switch (pattern[p])
    {
	case ']':
	case '-':
	    c = pattern[p];
	    p++;
	    r.setbit2(c);
	    break;

	default:
	    break;
    }

    enum RS { start, rliteral, dash };
    RS rs;

    rs = RS.start;
    for (;;)
    {
	if (p == pattern.length)
	    goto Lerr;
	switch (pattern[p])
	{
	    case ']':
		switch (rs)
		{   case RS.dash:
			r.setbit2('-');
		    case RS.rliteral:
			r.setbit2(c);
			break;
		    case RS.start:
			break;
		    default:
			assert(0);
		}
		p++;
		break;

	    case '\\':
		p++;
		r.setbitmax(cmax);
		if (p == pattern.length)
		    goto Lerr;
		switch (pattern[p])
		{
		    case 'd':
			for (i = '0'; i <= '9'; i++)
			    r.bits[i] = 1;
			goto Lrs;

		    case 'D':
			for (i = 1; i < '0'; i++)
			    r.bits[i] = 1;
			for (i = '9' + 1; i <= cmax; i++)
			    r.bits[i] = 1;
			goto Lrs;

		    case 's':
			for (i = 0; i <= cmax; i++)
			    if (isspace(i))
				r.bits[i] = 1;
			goto Lrs;

		    case 'S':
			for (i = 1; i <= cmax; i++)
			    if (!isspace(i))
				r.bits[i] = 1;
			goto Lrs;

		    case 'w':
			for (i = 0; i <= cmax; i++)
			    if (isword(cast(rchar)i))
				r.bits[i] = 1;
			goto Lrs;

		    case 'W':
			for (i = 1; i <= cmax; i++)
			    if (!isword(cast(rchar)i))
				r.bits[i] = 1;
			goto Lrs;

		    Lrs:
			switch (rs)
			{   case RS.dash:
				r.setbit2('-');
			    case RS.rliteral:
				r.setbit2(c);
				break;
			    default:
				break;
			}
			rs = RS.start;
			continue;

		    default:
			break;
		}
		c2 = escape();
		goto Lrange;

	    case '-':
		p++;
		if (rs == RS.start)
		    goto Lrange;
		else if (rs == RS.rliteral)
		    rs = RS.dash;
		else if (rs == RS.dash)
		{
		    r.setbit2(c);
		    r.setbit2('-');
		    rs = RS.start;
		}
		continue;

	    default:
		c2 = pattern[p];
		p++;
	    Lrange:
		switch (rs)
		{   case RS.rliteral:
			r.setbit2(c);
		    case RS.start:
			c = c2;
			rs = RS.rliteral;
			break;

		    case RS.dash:
			if (c > c2)
			{   error("inverted range in character class");
			    return 0;
			}
			r.setbitmax(c2);
			//printf("c = %x, c2 = %x\n",c,c2);
			for (; c <= c2; c++)
			    r.bits[c] = 1;
			rs = RS.start;
			break;

		    default:
			assert(0);
		}
		continue;
	}
	break;
    }
    if (attributes & REA.ignoreCase)
    {
	// BUG: what about dchar?
	r.setbitmax(0x7F);
	for (c = 'a'; c <= 'z'; c++)
	{
	    if (r.bits[c])
		r.bits[c + 'A' - 'a'] = 1;
	    else if (r.bits[c + 'A' - 'a'])
		r.bits[c] = 1;
	}
    }
    //printf("maxc = %d, maxb = %d\n",r.maxc,r.maxb);
    (cast(ushort *)&buf.data[offset])[0] = cast(ushort)r.maxc;
    (cast(ushort *)&buf.data[offset])[1] = cast(ushort)r.maxb;
    return 1;

Lerr:
    error("invalid range");
    return 0;
}

void error(char[] msg)
{
    errors++;
    debug(regexp) printf("error: %.*s\n", cast(int) msg.length, msg.ptr);
//assert(0);
//*(char*)0=0;
    throw new RegExpException(msg);
}

// p is following the \ char
int escape()
in
{
    assert(p < pattern.length);
}
body
{   int c;
    int i;
    rchar tc;

    c = pattern[p];		// none of the cases are multibyte
    switch (c)
    {
	case 'b':    c = '\b';	break;
	case 'f':    c = '\f';	break;
	case 'n':    c = '\n';	break;
	case 'r':    c = '\r';	break;
	case 't':    c = '\t';	break;
	case 'v':    c = '\v';	break;

	// BUG: Perl does \a and \e too, should we?

	case 'c':
	    ++p;
	    if (p == pattern.length)
		goto Lretc;
	    c = pattern[p];
	    // Note: we are deliberately not allowing dchar letters
	    if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')))
	    {
	     Lcerr:
		error("letter expected following \\c");
		return 0;
	    }
	    c &= 0x1F;
	    break;

	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	    c -= '0';
	    for (i = 0; i < 2; i++)
	    {
		p++;
		if (p == pattern.length)
		    goto Lretc;
		tc = pattern[p];
		if ('0' <= tc && tc <= '7')
		{   c = c * 8 + (tc - '0');
		    // Treat overflow as if last
		    // digit was not an octal digit
		    if (c >= 0xFF)
		    {	c >>= 3;
			return c;
		    }
		}
		else
		    return c;
	    }
	    break;

	case 'x':
	    c = 0;
	    for (i = 0; i < 2; i++)
	    {
		p++;
		if (p == pattern.length)
		    goto Lretc;
		tc = pattern[p];
		if ('0' <= tc && tc <= '9')
		    c = c * 16 + (tc - '0');
		else if ('a' <= tc && tc <= 'f')
		    c = c * 16 + (tc - 'a' + 10);
		else if ('A' <= tc && tc <= 'F')
		    c = c * 16 + (tc - 'A' + 10);
		else if (i == 0)	// if no hex digits after \x
		{
		    // Not a valid \xXX sequence
		    return 'x';
		}
		else
		    return c;
	    }
	    break;

	case 'u':
	    c = 0;
	    for (i = 0; i < 4; i++)
	    {
		p++;
		if (p == pattern.length)
		    goto Lretc;
		tc = pattern[p];
		if ('0' <= tc && tc <= '9')
		    c = c * 16 + (tc - '0');
		else if ('a' <= tc && tc <= 'f')
		    c = c * 16 + (tc - 'a' + 10);
		else if ('A' <= tc && tc <= 'F')
		    c = c * 16 + (tc - 'A' + 10);
		else
		{
		    // Not a valid \uXXXX sequence
		    p -= i;
		    return 'u';
		}
	    }
	    break;

	default:
	    break;
    }
    p++;
Lretc:
    return c;
}

/* ==================== optimizer ======================= */

void optimize()
{   ubyte[] prog;

    debug(regexp) printf("RegExp.optimize()\n");
    prog = buf.toBytes();
    for (size_t i = 0; 1;)
    {
	//printf("\tprog[%d] = %d, %d\n", i, prog[i], REstring);
	switch (prog[i])
	{
	    case REend:
	    case REanychar:
	    case REanystar:
	    case REbackref:
	    case REeol:
	    case REchar:
	    case REichar:
	    case REdchar:
	    case REidchar:
	    case REstring:
	    case REistring:
	    case REtestbit:
	    case REbit:
	    case REnotbit:
	    case RErange:
	    case REnotrange:
	    case REwordboundary:
	    case REnotwordboundary:
	    case REdigit:
	    case REnotdigit:
	    case REspace:
	    case REnotspace:
	    case REword:
	    case REnotword:
		return;

	    case REbol:
		i++;
		continue;

	    case REor:
	    case REnm:
	    case REnmq:
	    case REparen:
	    case REgoto:
	    {
		auto bitbuf = new OutBuffer;
		auto r = new Range(bitbuf);
		uint offset;

		offset = i;
		if (starrchars(r, prog[i .. prog.length]))
		{
		    debug(regexp) printf("\tfilter built\n");
		    buf.spread(offset, 1 + 4 + r.maxb);
		    buf.data[offset] = REtestbit;
		    (cast(ushort *)&buf.data[offset + 1])[0] = cast(ushort)r.maxc;
		    (cast(ushort *)&buf.data[offset + 1])[1] = cast(ushort)r.maxb;
		    i = offset + 1 + 4;
		    buf.data[i .. i + r.maxb] = r.base[0 .. r.maxb];
		}
		return;
	    }
	    default:
		assert(0);
	}
    }
}

/////////////////////////////////////////
// OR the leading character bits into r.
// Limit the character range from 0..7F,
// trymatch() will allow through anything over maxc.
// Return 1 if success, 0 if we can't build a filter or
// if there is no point to one.

int starrchars(Range r, ubyte[] prog)
{   rchar c;
    uint maxc;
    uint maxb;
    uint len;
    uint b;
    uint n;
    uint m;
    ubyte* pop;

    //printf("RegExp.starrchars(prog = %p, progend = %p)\n", prog, progend);
    for (size_t i = 0; i < prog.length;)
    {
	switch (prog[i])
	{
	    case REchar:
		c = prog[i + 1];
		if (c <= 0x7F)
		    r.setbit2(c);
		return 1;

	    case REichar:
		c = prog[i + 1];
		if (c <= 0x7F)
		{   r.setbit2(c);
		    r.setbit2(std.ctype.tolower(cast(rchar)c));
		}
		return 1;

	    case REdchar:
	    case REidchar:
		return 1;

	    case REanychar:
		return 0;		// no point

	    case REstring:
		len = *cast(uint *)&prog[i + 1];
		assert(len);
		c = *cast(rchar *)&prog[i + 1 + uint.sizeof];
		debug(regexp) printf("\tREstring %d, '%c'\n", len, c);
		if (c <= 0x7F)
		    r.setbit2(c);
		return 1;

	    case REistring:
		len = *cast(uint *)&prog[i + 1];
		assert(len);
		c = *cast(rchar *)&prog[i + 1 + uint.sizeof];
		debug(regexp) printf("\tREistring %d, '%c'\n", len, c);
		if (c <= 0x7F)
		{   r.setbit2(std.ctype.toupper(cast(rchar)c));
		    r.setbit2(std.ctype.tolower(cast(rchar)c));
		}
		return 1;

	    case REtestbit:
	    case REbit:
		maxc = (cast(ushort *)&prog[i + 1])[0];
		maxb = (cast(ushort *)&prog[i + 1])[1];
		if (maxc <= 0x7F)
		    r.setbitmax(maxc);
		else
		    maxb = r.maxb;
		for (b = 0; b < maxb; b++)
		    r.base[b] |= prog[i + 1 + 4 + b];
		return 1;

	    case REnotbit:
		maxc = (cast(ushort *)&prog[i + 1])[0];
		maxb = (cast(ushort *)&prog[i + 1])[1];
		if (maxc <= 0x7F)
		    r.setbitmax(maxc);
		else
		    maxb = r.maxb;
		for (b = 0; b < maxb; b++)
		    r.base[b] |= ~prog[i + 1 + 4 + b];
		return 1;

	    case REbol:
	    case REeol:
		return 0;

	    case REor:
		len = (cast(uint *)&prog[i + 1])[0];
		return starrchars(r, prog[i + 1 + uint.sizeof .. prog.length]) &&
		       starrchars(r, prog[i + 1 + uint.sizeof + len .. prog.length]);

	    case REgoto:
		len = (cast(uint *)&prog[i + 1])[0];
		i += 1 + uint.sizeof + len;
		break;

	    case REanystar:
		return 0;

	    case REnm:
	    case REnmq:
		// len, n, m, ()
		len = (cast(uint *)&prog[i + 1])[0];
		n   = (cast(uint *)&prog[i + 1])[1];
		m   = (cast(uint *)&prog[i + 1])[2];
		pop = &prog[i + 1 + uint.sizeof * 3];
		if (!starrchars(r, pop[0 .. len]))
		    return 0;
		if (n)
		    return 1;
		i += 1 + uint.sizeof * 3 + len;
		break;

	    case REparen:
		// len, ()
		len = (cast(uint *)&prog[i + 1])[0];
		n   = (cast(uint *)&prog[i + 1])[1];
		pop = &prog[0] + i + 1 + uint.sizeof * 2;
		return starrchars(r, pop[0 .. len]);

	    case REend:
		return 0;

	    case REwordboundary:
	    case REnotwordboundary:
		return 0;

	    case REdigit:
		r.setbitmax('9');
		for (c = '0'; c <= '9'; c++)
		    r.bits[c] = 1;
		return 1;

	    case REnotdigit:
		r.setbitmax(0x7F);
		for (c = 0; c <= '0'; c++)
		    r.bits[c] = 1;
		for (c = '9' + 1; c <= r.maxc; c++)
		    r.bits[c] = 1;
		return 1;

	    case REspace:
		r.setbitmax(0x7F);
		for (c = 0; c <= r.maxc; c++)
		    if (isspace(c))
			r.bits[c] = 1;
		return 1;

	    case REnotspace:
		r.setbitmax(0x7F);
		for (c = 0; c <= r.maxc; c++)
		    if (!isspace(c))
			r.bits[c] = 1;
		return 1;

	    case REword:
		r.setbitmax(0x7F);
		for (c = 0; c <= r.maxc; c++)
		    if (isword(cast(rchar)c))
			r.bits[c] = 1;
		return 1;

	    case REnotword:
		r.setbitmax(0x7F);
		for (c = 0; c <= r.maxc; c++)
		    if (!isword(cast(rchar)c))
			r.bits[c] = 1;
		return 1;

	    case REbackref:
		return 0;

	    default:
		assert(0);
	}
    }
    return 1;
}

/* ==================== replace ======================= */

/***********************
 * After a match is found with test(), this function
 * will take the match results and, using the format
 * string, generate and return a new string.
 */

public rchar[] replace(rchar[] format)
{
    return replace3(format, input, pmatch[0 .. re_nsub + 1]);
}

// Static version that doesn't require a RegExp object to be created

public static rchar[] replace3(rchar[] format, rchar[] input, regmatch_t[] pmatch)
{
    rchar[] result;
    uint c2;
    int rm_so;
    int rm_eo;
    int i;

//    printf("replace3(format = '%.*s', input = '%.*s')\n", format, input);
    result.length = format.length;
    result.length = 0;
    for (size_t f = 0; f < format.length; f++)
    {
	auto c = format[f];
      L1:
	if (c != '$')
	{
	    result ~= c;
	    continue;
	}
	++f;
	if (f == format.length)
	{
	    result ~= '$';
	    break;
	}
	c = format[f];
	switch (c)
	{
	    case '&':
		rm_so = pmatch[0].rm_so;
		rm_eo = pmatch[0].rm_eo;
		goto Lstring;

	    case '`':
		rm_so = 0;
		rm_eo = pmatch[0].rm_so;
		goto Lstring;

	    case '\'':
		rm_so = pmatch[0].rm_eo;
		rm_eo = input.length;
		goto Lstring;

	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
		i = c - '0';
		if (f + 1 == format.length)
		{
		    if (i == 0)
		    {
			result ~= '$';
			result ~= c;
			continue;
		    }
		}
		else
		{
		    c2 = format[f + 1];
		    if (c2 >= '0' && c2 <= '9')
		    {   i = (c - '0') * 10 + (c2 - '0');
			f++;
		    }
		    if (i == 0)
		    {
			result ~= '$';
			result ~= c;
			c = cast(char)c2;
			goto L1;
		    }
		}

		if (i < pmatch.length)
		{   rm_so = pmatch[i].rm_so;
		    rm_eo = pmatch[i].rm_eo;
		    goto Lstring;
		}
		break;

	    Lstring:
		if (rm_so != rm_eo)
		    result ~= input[rm_so .. rm_eo];
		break;

	    default:
		result ~= '$';
		result ~= c;
		break;
	}
    }
    return result;
}

/************************************
 * Like replace(char[] format), but uses old style formatting:
		<table border=1 cellspacing=0 cellpadding=5>
		<th>Format
		<th>Description
		<tr>
		<td><b>&</b>
		<td>replace with the match
		</tr>
		<tr>
		<td><b>\</b><i>n</i>
		<td>replace with the <i>n</i>th parenthesized match, <i>n</i> is 1..9
		</tr>
		<tr>
		<td><b>\</b><i>c</i>
		<td>replace with char <i>c</i>.
		</tr>
		</table>
 */

public rchar[] replaceOld(rchar[] format)
{
    rchar[] result;

//printf("replace: this = %p so = %d, eo = %d\n", this, pmatch[0].rm_so, pmatch[0].rm_eo);
//printf("3input = '%.*s'\n", input);
    result.length = format.length;
    result.length = 0;
    for (size_t i; i < format.length; i++)
    {
	auto c = format[i];
	switch (c)
	{
	    case '&':
//printf("match = '%.*s'\n", input[pmatch[0].rm_so .. pmatch[0].rm_eo]);
		result ~= input[pmatch[0].rm_so .. pmatch[0].rm_eo];
		break;

	    case '\\':
		if (i + 1 < format.length)
		{
		    c = format[++i];
		    if (c >= '1' && c <= '9')
		    {   uint j;

			j = c - '0';
			if (j <= re_nsub && pmatch[j].rm_so != pmatch[j].rm_eo)
			    result ~= input[pmatch[j].rm_so .. pmatch[j].rm_eo];
			break;
		    }
		}
		result ~= c;
		break;

	    default:
		result ~= c;
		break;
	}
    }
    return result;
}

}

unittest
{   // Created and placed in public domain by Don Clugston

    auto m = search("aBC r s", `bc\x20r[\40]s`, "i");
    assert(m.pre=="a");
    assert(m.match(0)=="BC r s");
    auto m2 = search("7xxyxxx", `^\d([a-z]{2})\D\1`);
    assert(m2.match(0)=="7xxyxx");
    // Just check the parsing.
    auto m3 = search("dcbxx", `ca|b[\d\]\D\s\S\w-\W]`);
    auto m4 = search("xy", `[^\ca-\xFa\r\n\b\f\t\v\0123]{2,485}$`);
    auto m5 = search("xxx", `^^\r\n\b{13,}\f{4}\t\v\u02aF3a\w\W`);
    auto m6 = search("xxy", `.*y`);
    assert(m6.match(0)=="xxy");
    auto m7 = search("QWDEfGH", "(ca|b|defg)+", "i");
    assert(m7.match(0)=="DEfG");
    auto m8 = search("dcbxx", `a?\B\s\S`);
    auto m9 = search("dcbxx", `[-w]`);
    auto m10 = search("dcbsfd", `aB[c-fW]dB|\d|\D|\u012356|\w|\W|\s|\S`, "i");
    auto m11 = search("dcbsfd", `[]a-]`);
    m.replaceOld(`a&b\1c`);
    m.replace(`a$&b$'$1c`);
}