Anagram finder in C

This is the traditional-style C implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

This version is a rework of the OO-inspired C version. It was created by taking taking the OO-inspired C version and:

  • removing struct AnagramFinder, and made all its members global,
  • removing all occurences of this, and
  • shortening function names. The prefix anagram_finder_ is now af_.

The functions af_init() and af_finish() correspond to the constructor and destructor in the C++ class AnagramFinder.

The resulting code as marginally faster, by 1-2%.

The source code

# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <assert.h>

# define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0]))
# define MINIMUM 3
# define ZEROFILL(x) memset((x), 0, sizeof (x))

static struct App {
    int comparisons;
} app;

//---------------------------------------------------------------------------------------- Word

typedef struct {
    int    offset;
    char  *value;
    char  *unified;
} Word;

Word *word_new(int offset, char *value, char *unified, int len) {
    Word *rval = (Word *)malloc(sizeof *rval);
    assert(rval);
    rval->offset  = offset;
    rval->value   = strndup(value, len);
    rval->unified = strdup(unified);
    assert(rval->value);
    assert(rval->unified);
    return rval;
}

void word_delete(Word *w) {
    free(w->unified);
    free(w->value);
    free(w);
}

//---------------------------------------------------------------------------------------- AnagramFinder

static int     len;
static int    nwords;
static Word ** words;
static Word  * accepted[20];
static int    naccepted;     //--- Current size
static int     low_offset;
static int     letters[26];

char *af_get_unified(char *tmp);

void af_init(char *text, int llen) {
    len        = llen;
    nwords     = strlen(text) - len + 1;
    words      = (Word **)malloc(nwords * sizeof words[0]);
    ZEROFILL(accepted);
    naccepted  = 0;
    low_offset = 0;
    ZEROFILL(letters);
    for (int n = 0; n < len-1; n++)
        letters[text[n] - 'A']++;
    for (int n = 0; n < nwords; n++) {
        char unified[40];
        letters[text[n+len-1] - 'A']++;
        words[n] = word_new(n, text+n, af_get_unified(unified), len);
        letters[text[n] - 'A']--;
    }
}

void af_finish(void) {
    for (int n = 0; n < nwords; n++)
        word_delete(words[n]);
    free(words);
}

int af_compare_words(void const *aa, void  const *bb) {
    Word const *a = *(Word const **)aa;
    Word const *b = *(Word const **)bb;
    app.comparisons++;
    int rval = strcmp(a->unified, b->unified);
    return rval ? rval : (a->offset - b->offset);
}

char *af_get_unified(char *tmp) {
    char *p = tmp;
    char letter = 'A';
    for (unsigned n = 0; n < VECTSIZE(letters); n++, letter++)
        for (int m = 0; m < letters[n]; m++)
            *p++ = letter;
    *p++ = 0;
    return tmp;
}

void af_print_one_result(void) {
    printf("%3d:", naccepted);
    for (int n = 0; n < naccepted; n++) {
        Word *it = accepted[n];
        printf(" %4d:%s", it->offset, it->value);
    }
    printf("\n");
}

void af_add_candidate(Word *cand) {
    if (cand->offset < low_offset)
        return;
    for (int n = 0; n < naccepted; n++)
        if (0 == strcmp(cand->value, accepted[n]->value))
            return;
    accepted[naccepted++] = cand;
    low_offset = cand->offset + len;
}

void af_add_range(int begin, int end) {
    if (end-begin >= MINIMUM) {
        naccepted  = 0;
        low_offset = 0;
        for (int n = begin; n < end; n++)
            af_add_candidate(words[n]);
        if (naccepted >= MINIMUM)
            af_print_one_result();
    }
}

void af_find_all() {
    qsort(words, nwords, sizeof words[0], af_compare_words);
    int begin = 0;
    for (int n = 1; n < nwords; n++) {
        if (strcmp(words[n]->unified, words[begin]->unified)) {
            af_add_range(begin, n);
            begin = n;
        }
    }
    af_add_range(begin, nwords);
}

//---------------------------------------------------------------------------------------- main program

int read_text(char *text, int allocated)
{
    int ch;
    int end = 0;
    while (EOF != (ch = getchar())) {
        if (isalpha(ch)) {
            assert(end < allocated-1);
            text[end++] = toupper(ch);
        }
    }
    text[end] = 0;
    return end;
}

int main(int argc, char *argv[])
{
    char text[500 * 512 + 1];
    int from     = (argc > 1) ? atoi(argv[1]) : 6;
    int to       = (argc > 2) ? atoi(argv[2]) : from;

    read_text(text, VECTSIZE(text));

    for (int len = from; len <= to; len++) {
        printf("%slen = %d\n", (len > from) ? "\n" : "", len);
        af_init    (text, len);
        af_find_all();
        af_finish  ();
    }

    printf("string length = %5lu, comparisons = %d\n", strlen(text), app.comparisons);
    return 0;
}

You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090

View source for the content of this page.