view dmd/speller.c @ 1650:40bd4a0d4870

Update to work with LLVM 2.7. Removed use of dyn_cast, llvm no compiles without exceptions and rtti by default. We do need exceptions for the libconfig stuff, but rtti isn't necessary (anymore). Debug info needs to be rewritten, as in LLVM 2.7 the format has completely changed. To have something to look at while rewriting, the old code has been wrapped inside #ifndef DISABLE_DEBUG_INFO , this means that you have to define this to compile at the moment. Updated tango 0.99.9 patch to include updated EH runtime code, which is needed for LLVM 2.7 as well.
author Tomas Lindquist Olsen
date Wed, 19 May 2010 12:42:32 +0200
parents 00cd99bedf06
children
line wrap: on
line source


#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "speller.h"

const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

/**************************************************
 * Looks for correct spelling.
 * Currently only looks a 'distance' of one from the seed[].
 * This does an exhaustive search, so can potentially be very slow.
 * Input:
 *seedwrongly spelled word
 *fpsearch function
 *fpargargument to search function
 *charsetcharacter set
 * Returns:
 *NULLno correct spellings found
 *void*value returned by fp() for first possible correct spelling
 */

void *speller(const char *seed, fp_speller_t fp, void *fparg, const char *charset)
{
    size_t seedlen = strlen(seed);
    if (!seedlen)
        return NULL;

    char *buf = (char *)alloca(seedlen + 2);// leave space for extra char
    if (!buf)
        return NULL;// no matches

    /* Deletions */
    memcpy(buf, seed + 1, seedlen);
    for (int i = 0; i < seedlen; i++)
        {
            //printf("del buf = '%s'\n", buf);
            void *p = (*fp)(fparg, buf);
            if (p)
                return p;

            buf[i] = seed[i];
        }

    /* Transpositions */
    memcpy(buf, seed, seedlen + 1);
    for (int i = 0; i + 1 < seedlen; i++)
        {
            // swap [i] and [i + 1]
            buf[i] = seed[i + 1];
            buf[i + 1] = seed[i];

            //printf("tra buf = '%s'\n", buf);
            void *p = (*fp)(fparg, buf);
            if (p)
                return p;

            buf[i] = seed[i];
        }

    if (charset && *charset)
        {
            /* Substitutions */
            memcpy(buf, seed, seedlen + 1);
            for (int i = 0; i < seedlen; i++)
                {
                    for (const char *s = charset; *s; s++)
                        {
                            buf[i] = *s;

                            //printf("sub buf = '%s'\n", buf);
                            void *p = (*fp)(fparg, buf);
                            if (p)
                                return p;
                        }
                    buf[i] = seed[i];
                }

            /* Insertions */
            memcpy(buf + 1, seed, seedlen + 1);
            for (int i = 0; i <= seedlen; i++)// yes, do seedlen+1 iterations
                {
                    for (const char *s = charset; *s; s++)
                        {
                            buf[i] = *s;

                            //printf("ins buf = '%s'\n", buf);
                            void *p = (*fp)(fparg, buf);
                            if (p)
                                return p;
                        }
                    buf[i] = seed[i];// going past end of seed[] is ok, as we hit the 0
                }
        }

    return NULL;// didn't find any corrections
}