Anagram finder in C, OO-inspired – Lars Nordenström's blog

This is the OO-inspired C implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

This OO-inspired C implementation is as close as you can get to the C++ program. Member functions are replaced by C functions that take a pointer as their first argument. For building the unified version of each word, it uses the sliding windows technique also used by by the optimized C++ version.

Here, OO-inspired means that it is designed around data types with associated operations, and that it is a close match to what a real OO program would look like.

struct Word

Members in Word.

int offset The offset into the letter-only string. Used for eliminating overlapping candidates.
char *value The word at offset.
char *unified The unified version of value.

Type-specific functions for Word.

word_new(int offset, char *value, char *unified) The constructor.
word_delete(Word *) The destructor.

class AnagramFinder

The type AnagramFinder is abstract; it is possible to move the definition of both the type and its operations to a separate source file, and to allow access to it by opaque pointers only.

Member data in AnagramFinder.

int len The length of the anagrams that this AnagramFinder looks for.
Word **words All the possible words. If there are 512 letters and len is 4, then there are 509 words.
int nwords The allocated number of words, equal to the actual number of words.
Word *accepted[20] All successful candidates for a single set of anagram words.
int naccepted The current number of successful candidates.
char letters[26] Counts the number of occurences of each letter of the English alphabet in the current word.

Type-specific functions for AnagramFinder.

AnagramFinder * anagram_finder_new ( char *text, int len ) Constructs a new finder for the given length. Initializes words, but does not sort it.
void anagram_finder_delete ( AnagramFinder *this ) Destructor.
bool anagram_finder_compare_words ( void const *a, void const *b ) Passed to qsort().
char anagram_finder_get_unified ( AnagramFinder *this, char *tmp ) Builds the unified word from letters[].
void anagram_finder_print_one_result ( AnagramFinder *this ) Prints the words in accepted.
void anagram_finder_add_candidate ( AnagramFinder *this, Word *cand ) Tries to add a candidate to accepted. Rejects duplicates of words already in the list. Rejects words that overlap words already in the list.
void anagram_finder_add_range ( AnagramFinder *this, int begin, int end ) Passes each of the words in the range to add_candidate.
void anagram_finder_find_all ( AnagramFinder *this ) Finds all the anagram set of with words of length len.

Global data and functions.

int read_text(char *text, int allocated) Reads all of the input data, discards all characters except letters, and converts all lower-case letter to upper-case letters.
int main(int argc, char *argv[]) Parses command line arguments. There may be s single integer for a single run of the given length, or a range given by two numbers. The default is a run for length 6. A new AnagramFinder is created, run and deleted for each length.

The source code

# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <assert.h>

# define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0]))
# define MINIMUM 3

static struct App {
    int comparisons;
} app;

//---------------------------------------------------------------------------------------- Word

typedef struct {
    int    offset;
    char  *value;
    char  *unified;
} Word;

Word *word_new(int offset, char *value, char *unified, int len) {
    Word *rval = (Word *)malloc(sizeof *rval);
    rval->offset  = offset;
    rval->value   = strndup(value, len);
    rval->unified = strdup(unified);
    return rval;
}

void word_delete(Word *this) {
    free(this->unified);
    free(this->value);
    free(this);
}

//---------------------------------------------------------------------------------------- AnagramFinder

typedef struct {
    int      len;
    int     nwords;
    Word **  words;
    Word  *  accepted[20];
    int     naccepted;     //--- Current size
    int      low_offset;
    int      letters[26];
} AnagramFinder;

char *anagram_finder_get_unified(AnagramFinder *this, char *tmp);

AnagramFinder *anagram_finder_new(char *text, int len) {
    AnagramFinder *this = (AnagramFinder *)malloc(sizeof *this);
    memset(this, 0, sizeof *this);
    this->len      = len;
    this->nwords   = strlen(text) - len + 1;
    this->words    = (Word **)malloc(this->nwords * sizeof this->words[0]);
    for (int n = 0; n < len-1; n++)
        this->letters[text[n] - 'A']++;
    for (int n = 0; n < this->nwords; n++) {
        char unified[40];
        this->letters[text[n+len-1] - 'A']++;
        this->words[n] = word_new(n, text+n, anagram_finder_get_unified(this, unified), this->len);
        this->letters[text[n] - 'A']--;
    }
    return this;
}

void anagram_finder_delete(AnagramFinder *this) {
    for (int n = 0; n < this->nwords; n++)
        word_delete(this->words[n]);
    free(this->words);
    free(this);
}

int compare_words(void const *aa, void  const *bb) {
    Word const *a = *(Word const **)aa;
    Word const *b = *(Word const **)bb;
    app.comparisons++;
    int rval = strcmp(a->unified, b->unified);
    return rval ? rval : (a->offset - b->offset);
}

char *anagram_finder_get_unified(AnagramFinder *this, char *tmp) {
    char *p = tmp;
    char letter = 'A';
    for (unsigned n = 0; n < VECTSIZE(this->letters); n++, letter++)
        for (int m = 0; m < this->letters[n]; m++)
            *p++ = letter;
    *p++ = 0;
    return tmp;
}

void anagram_finder_print_one_result(AnagramFinder *this) {
    printf("%3d:", this->naccepted);
    for (int n = 0; n < this->naccepted; n++) {
        Word *it = this->accepted[n];
        printf(" %4d:%s", it->offset, it->value);
    }
    printf("\n");
}

void anagram_finder_add_candidate(AnagramFinder *this, Word *cand) {
    if (cand->offset < this->low_offset)
        return;
    for (int n = 0; n < this->naccepted; n++)
        if (0 == strcmp(cand->value, this->accepted[n]->value))
            return;
    this->accepted[this->naccepted++] = cand;
    this->low_offset = cand->offset + this->len;
}

void anagram_finder_add_range(AnagramFinder *this, int begin, int end) {
    if (end-begin >= MINIMUM) {
        this->naccepted  = 0;
        this->low_offset = 0;
        for (int n = begin; n < end; n++)
            anagram_finder_add_candidate(this, this->words[n]);
        if (this->naccepted >= MINIMUM)
            anagram_finder_print_one_result(this);
    }
}

void anagram_finder_find_all(AnagramFinder *this) {
    qsort(this->words, this->nwords, sizeof this->words[0], compare_words);
    int begin = 0;
    for (int n = 1; n < this->nwords; n++) {
        if (strcmp(this->words[n]->unified, this->words[begin]->unified)) {
            anagram_finder_add_range(this, begin, n);
            begin = n;
        }
    }
    anagram_finder_add_range(this, begin, this->nwords);
}

//---------------------------------------------------------------------------------------- main program

int read_text(char *text, int allocated)
{
    int ch;
    int end = 0;
    while (EOF != (ch = getchar())) {
        if (isalpha(ch)) {
            assert(end < allocated-1);
            text[end++] = toupper(ch);
        }
    }
    text[end] = 0;
    return end;
}

int main(int argc, char *argv[])
{
    char text[500 * 512 + 1];
    int from     = (argc > 1) ? atoi(argv[1]) : 6;
    int to       = (argc > 2) ? atoi(argv[2]) : from;

    read_text(text, VECTSIZE(text));

    for (int len = from; len <= to; len++) {
        printf("%slen = %d\n", (len > from) ? "\n" : "", len);
        AnagramFinder *finder = anagram_finder_new(text, len);
        anagram_finder_find_all(finder);
        anagram_finder_delete(finder);
    }

    printf("string length = %5lu, comparisons = %d\n", strlen(text), app.comparisons);
    return 0;
}

You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090

View source for the content of this page.

struct Word

class AnagramFinder

Global data and functions.

The source code

Cancel reply