ldc/dmd/speller.c


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#include "speller.h"

const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

/**************************************************
 * Looks for correct spelling.
 * Currently only looks a 'distance' of one from the seed[].
 * This does an exhaustive search, so can potentially be very slow.
 * Input:
 *      seed            wrongly spelled word
 *      fp              search function
 *      fparg           argument to search function
 *      charset         character set
 * Returns:
 *      NULL            no correct spellings found
 *      void*           value returned by fp() for first possible correct spelling
 */

void *spellerY(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
        const char *charset, size_t index)
{
    if (!seedlen)
        return NULL;
    assert(seed[seedlen] == 0);

    char tmp[30];
    char *buf;
    if (seedlen <= sizeof(tmp) - 2)
        buf = tmp;
    else
    {
        buf = (char *)alloca(seedlen + 2);    // leave space for extra char
        if (!buf)
            return NULL;                      // no matches
    }

    memcpy(buf, seed, index);

    /* Delete at seed[index] */
    if (index < seedlen)
    {
        memcpy(buf + index, seed + index + 1, seedlen - index);
        assert(buf[seedlen - 1] == 0);
        void *p = (*fp)(fparg, buf);
        if (p)
            return p;
    }

    if (charset && *charset)
    {
        /* Substitutions */
        if (index < seedlen)
        {
            memcpy(buf, seed, seedlen + 1);
            for (const char *s = charset; *s; s++)
            {
                buf[index] = *s;

                //printf("sub buf = '%s'\n", buf);
                void *p = (*fp)(fparg, buf);
                if (p)
                    return p;
            }
            assert(buf[seedlen] == 0);
        }

        /* Insertions */
        memcpy (buf + index + 1, seed + index, seedlen + 1 - index);

        for (const char *s = charset; *s; s++)
        {
            buf[index] = *s;

            //printf("ins buf = '%s'\n", buf);
            void *p = (*fp)(fparg, buf);
            if (p)
                return p;
        }
        assert(buf[seedlen + 1] == 0);
    }

    return NULL;                // didn't find any corrections
}

void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
        const char *charset, int flag)
{
    if (!seedlen)
        return NULL;

    char tmp[30];
    char *buf;
    if (seedlen <= sizeof(tmp) - 2)
        buf = tmp;
    else
    {
        buf = (char *)alloca(seedlen + 2);    // leave space for extra char
        if (!buf)
            return NULL;                      // no matches
    }

    /* Deletions */
    memcpy(buf, seed + 1, seedlen);
    for (int i = 0; i < seedlen; i++)
    {
        //printf("del buf = '%s'\n", buf);
        void *p;
        if (flag)
            p = spellerY(buf, seedlen - 1, fp, fparg, charset, i);
        else
            p = (*fp)(fparg, buf);
        if (p)
            return p;

        buf[i] = seed[i];
    }

    /* Transpositions */
    if (!flag)
    {
        memcpy(buf, seed, seedlen + 1);
        for (int i = 0; i + 1 < seedlen; i++)
        {
            // swap [i] and [i + 1]
            buf[i] = seed[i + 1];
            buf[i + 1] = seed[i];

            //printf("tra buf = '%s'\n", buf);
            void *p = (*fp)(fparg, buf);
            if (p)
                return p;

            buf[i] = seed[i];
        }
    }

    if (charset && *charset)
    {
        /* Substitutions */
        memcpy(buf, seed, seedlen + 1);
        for (int i = 0; i < seedlen; i++)
        {
            for (const char *s = charset; *s; s++)
            {
                buf[i] = *s;

                //printf("sub buf = '%s'\n", buf);
                void *p;
                if (flag)
                    p = spellerY(buf, seedlen, fp, fparg, charset, i + 1);
                else
                    p = (*fp)(fparg, buf);
                if (p)
                    return p;
            }
            buf[i] = seed[i];
        }

        /* Insertions */
        memcpy(buf + 1, seed, seedlen + 1);
        for (int i = 0; i <= seedlen; i++)      // yes, do seedlen+1 iterations
        {
            for (const char *s = charset; *s; s++)
            {
                buf[i] = *s;

                //printf("ins buf = '%s'\n", buf);
                void *p;
                if (flag)
                    p = spellerY(buf, seedlen + 1, fp, fparg, charset, i + 1);
                else
                    p = (*fp)(fparg, buf);
                if (p)
                    return p;
            }
            buf[i] = seed[i];   // going past end of seed[] is ok, as we hit the 0
        }
    }

    return NULL;                // didn't find any corrections
}

void *speller(const char *seed, fp_speller_t fp, void *fparg, const char *charset)
{
    size_t seedlen = strlen(seed);
    for (int distance = 0; distance < 2; distance++)
    {   void *p = spellerX(seed, seedlen, fp, fparg, charset, distance);
        if (p)
            return p;
//      if (seedlen > 10)
//          break;
    }
    return NULL;   // didn't find it
}


#if UNITTEST

#include <stdio.h>
#include <string.h>
#include <assert.h>

void *speller_test(void *fparg, const char *s)
{
    //printf("speller_test(%s, %s)\n", fparg, s);
    if (strcmp((char *)fparg, s) == 0)
        return fparg;
    return NULL;
}

void unittest_speller()
{
    static const char *cases[][3] =
    {
        { "hello", "hell",  "y" },
        { "hello", "hel",   "y" },
        { "hello", "ello",  "y" },
        { "hello", "llo",   "y" },
        { "hello", "hellox",  "y" },
        { "hello", "helloxy",  "y" },
        { "hello", "xhello",  "y" },
        { "hello", "xyhello",  "y" },
        { "hello", "ehllo",  "y" },
        { "hello", "helol",  "y" },
        { "hello", "abcd",  "n" },
        //{ "ehllo", "helol", "y" },
        { "hello", "helxxlo", "y" },
        { "hello", "ehlxxlo", "n" },
        { "hello", "heaao", "y" },
        { "_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y" },
    };
    //printf("unittest_speller()\n");
    const void *p = speller("hello", &speller_test, (void *)"hell", idchars);
    assert(p != NULL);
    for (int i = 0; i < sizeof(cases)/sizeof(cases[0]); i++)
    {
        //printf("case [%d]\n", i);
        void *p = speller(cases[i][0], &speller_test, (void *)cases[i][1], idchars);
        if (p)
            assert(cases[i][2][0] == 'y');
        else
            assert(cases[i][2][0] == 'n');
    }
    //printf("unittest_speller() success\n");
}

#endif