Anagram finder in C, OO-inspired

This is the OO-inspired C implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

This OO-inspired C implementation is as close as you can get to the C++ program. Member functions are replaced by C functions that take a pointer as their first argument. For building the unified version of each word, it uses the sliding windows technique also used by by the optimized C++ version.

Here, OO-inspired means that it is designed around data types with associated operations, and that it is a close match to what a real OO program would look like.

struct Word

Members in Word.

  • int offset The offset into the letter-only string. Used for eliminating overlapping candidates.
  • char *value The word at offset.
  • char *unified The unified version of value.

Type-specific functions for Word.

  • word_new(int offset, char *value, char *unified) The constructor.
  • word_delete(Word *) The destructor.

class AnagramFinder

The type AnagramFinder is abstract; it is possible to move the definition of both the type and its operations to a separate source file, and to allow access to it by opaque pointers only.

Member data in AnagramFinder.

  • int len The length of the anagrams that this AnagramFinder looks for.
  • Word **words All the possible words. If there are 512 letters and len is 4, then there are 509 words.
  • int nwords The allocated number of words, equal to the actual number of words.
  • Word *accepted[20] All successful candidates for a single set of anagram words.
  • int naccepted The current number of successful candidates.
  • char letters[26] Counts the number of occurences of each letter of the English alphabet in the current word.

Type-specific functions for AnagramFinder.

  • AnagramFinder * anagram_finder_new ( char *text, int len ) Constructs a new finder for the given length. Initializes words, but does not sort it.
  • void anagram_finder_delete ( AnagramFinder *this ) Destructor.
  • bool anagram_finder_compare_words ( void const *a, void const *b ) Passed to qsort().
  • char anagram_finder_get_unified ( AnagramFinder *this, char *tmp ) Builds the unified word from letters[].
  • void anagram_finder_print_one_result ( AnagramFinder *this ) Prints the words in accepted.
  • void anagram_finder_add_candidate ( AnagramFinder *this, Word *cand ) Tries to add a candidate to accepted. Rejects duplicates of words already in the list. Rejects words that overlap words already in the list.
  • void anagram_finder_add_range ( AnagramFinder *this, int begin, int end ) Passes each of the words in the range to add_candidate.
  • void anagram_finder_find_all ( AnagramFinder *this ) Finds all the anagram set of with words of length len.

Global data and functions.

  • int read_text(char *text, int allocated) Reads all of the input data, discards all characters except letters, and converts all lower-case letter to upper-case letters.
  • int main(int argc, char *argv[]) Parses command line arguments. There may be s single integer for a single run of the given length, or a range given by two numbers. The default is a run for length 6. A new AnagramFinder is created, run and deleted for each length.

The source code

# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <assert.h>

# define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0]))
# define MINIMUM 3

static struct App {
    int comparisons;
} app;

//---------------------------------------------------------------------------------------- Word

typedef struct {
    int    offset;
    char  *value;
    char  *unified;
} Word;

Word *word_new(int offset, char *value, char *unified, int len) {
    Word *rval = (Word *)malloc(sizeof *rval);
    rval->offset  = offset;
    rval->value   = strndup(value, len);
    rval->unified = strdup(unified);
    return rval;
}

void word_delete(Word *this) {
    free(this->unified);
    free(this->value);
    free(this);
}

//---------------------------------------------------------------------------------------- AnagramFinder

typedef struct {
    int      len;
    int     nwords;
    Word **  words;
    Word  *  accepted[20];
    int     naccepted;     //--- Current size
    int      low_offset;
    int      letters[26];
} AnagramFinder;

char *anagram_finder_get_unified(AnagramFinder *this, char *tmp);

AnagramFinder *anagram_finder_new(char *text, int len) {
    AnagramFinder *this = (AnagramFinder *)malloc(sizeof *this);
    memset(this, 0, sizeof *this);
    this->len      = len;
    this->nwords   = strlen(text) - len + 1;
    this->words    = (Word **)malloc(this->nwords * sizeof this->words[0]);
    for (int n = 0; n < len-1; n++)
        this->letters[text[n] - 'A']++;
    for (int n = 0; n < this->nwords; n++) {
        char unified[40];
        this->letters[text[n+len-1] - 'A']++;
        this->words[n] = word_new(n, text+n, anagram_finder_get_unified(this, unified), this->len);
        this->letters[text[n] - 'A']--;
    }
    return this;
}

void anagram_finder_delete(AnagramFinder *this) {
    for (int n = 0; n < this->nwords; n++)
        word_delete(this->words[n]);
    free(this->words);
    free(this);
}

int compare_words(void const *aa, void  const *bb) {
    Word const *a = *(Word const **)aa;
    Word const *b = *(Word const **)bb;
    app.comparisons++;
    int rval = strcmp(a->unified, b->unified);
    return rval ? rval : (a->offset - b->offset);
}

char *anagram_finder_get_unified(AnagramFinder *this, char *tmp) {
    char *p = tmp;
    char letter = 'A';
    for (unsigned n = 0; n < VECTSIZE(this->letters); n++, letter++)
        for (int m = 0; m < this->letters[n]; m++)
            *p++ = letter;
    *p++ = 0;
    return tmp;
}

void anagram_finder_print_one_result(AnagramFinder *this) {
    printf("%3d:", this->naccepted);
    for (int n = 0; n < this->naccepted; n++) {
        Word *it = this->accepted[n];
        printf(" %4d:%s", it->offset, it->value);
    }
    printf("\n");
}

void anagram_finder_add_candidate(AnagramFinder *this, Word *cand) {
    if (cand->offset < this->low_offset)
        return;
    for (int n = 0; n < this->naccepted; n++)
        if (0 == strcmp(cand->value, this->accepted[n]->value))
            return;
    this->accepted[this->naccepted++] = cand;
    this->low_offset = cand->offset + this->len;
}

void anagram_finder_add_range(AnagramFinder *this, int begin, int end) {
    if (end-begin >= MINIMUM) {
        this->naccepted  = 0;
        this->low_offset = 0;
        for (int n = begin; n < end; n++)
            anagram_finder_add_candidate(this, this->words[n]);
        if (this->naccepted >= MINIMUM)
            anagram_finder_print_one_result(this);
    }
}

void anagram_finder_find_all(AnagramFinder *this) {
    qsort(this->words, this->nwords, sizeof this->words[0], compare_words);
    int begin = 0;
    for (int n = 1; n < this->nwords; n++) {
        if (strcmp(this->words[n]->unified, this->words[begin]->unified)) {
            anagram_finder_add_range(this, begin, n);
            begin = n;
        }
    }
    anagram_finder_add_range(this, begin, this->nwords);
}

//---------------------------------------------------------------------------------------- main program

int read_text(char *text, int allocated)
{
    int ch;
    int end = 0;
    while (EOF != (ch = getchar())) {
        if (isalpha(ch)) {
            assert(end < allocated-1);
            text[end++] = toupper(ch);
        }
    }
    text[end] = 0;
    return end;
}

int main(int argc, char *argv[])
{
    char text[500 * 512 + 1];
    int from     = (argc > 1) ? atoi(argv[1]) : 6;
    int to       = (argc > 2) ? atoi(argv[2]) : from;

    read_text(text, VECTSIZE(text));

    for (int len = from; len <= to; len++) {
        printf("%slen = %d\n", (len > from) ? "\n" : "", len);
        AnagramFinder *finder = anagram_finder_new(text, len);
        anagram_finder_find_all(finder);
        anagram_finder_delete(finder);
    }

    printf("string length = %5lu, comparisons = %d\n", strlen(text), app.comparisons);
    return 0;
}

You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090

View source for the content of this page.