This is the OO-inspired C implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.
This OO-inspired C implementation is as close as you can get to the C++ program. Member functions are replaced by C functions that take a pointer as their first argument. For building the unified version of each word, it uses the sliding windows technique also used by by the optimized C++ version.
Here, OO-inspired means that it is designed around data types with associated operations, and that it is a close match to what a real OO program would look like.
struct Word
Members in Word.
- int offset The offset into the letter-only string. Used for eliminating overlapping candidates.
- char *value The word at offset.
- char *unified The unified version of value.
Type-specific functions for Word.
- word_new(int offset, char *value, char *unified) The constructor.
- word_delete(Word *) The destructor.
class AnagramFinder
The type AnagramFinder is abstract; it is possible to move the definition of both the type and its operations to a separate source file, and to allow access to it by opaque pointers only.
Member data in AnagramFinder.
- int len The length of the anagrams that this AnagramFinder looks for.
- Word **words All the possible words. If there are 512 letters and len is 4, then there are 509 words.
- int nwords The allocated number of words, equal to the actual number of words.
- Word *accepted[20] All successful candidates for a single set of anagram words.
- int naccepted The current number of successful candidates.
- char letters[26] Counts the number of occurences of each letter of the English alphabet in the current word.
Type-specific functions for AnagramFinder.
- AnagramFinder * anagram_finder_new ( char *text, int len ) Constructs a new finder for the given length. Initializes words, but does not sort it.
- void anagram_finder_delete ( AnagramFinder *this ) Destructor.
- bool anagram_finder_compare_words ( void const *a, void const *b ) Passed to qsort().
- char anagram_finder_get_unified ( AnagramFinder *this, char *tmp ) Builds the unified word from letters[].
- void anagram_finder_print_one_result ( AnagramFinder *this ) Prints the words in accepted.
- void anagram_finder_add_candidate ( AnagramFinder *this, Word *cand ) Tries to add a candidate to accepted. Rejects duplicates of words already in the list. Rejects words that overlap words already in the list.
- void anagram_finder_add_range ( AnagramFinder *this, int begin, int end ) Passes each of the words in the range to add_candidate.
- void anagram_finder_find_all ( AnagramFinder *this ) Finds all the anagram set of with words of length len.
Global data and functions.
- int read_text(char *text, int allocated) Reads all of the input data, discards all characters except letters, and converts all lower-case letter to upper-case letters.
- int main(int argc, char *argv[]) Parses command line arguments. There may be s single integer for a single run of the given length, or a range given by two numbers. The default is a run for length 6. A new AnagramFinder is created, run and deleted for each length.
The source code
# include <stdio.h> # include <stdlib.h> # include <string.h> # include <ctype.h> # include <assert.h> # define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0])) # define MINIMUM 3 static struct App { int comparisons; } app; //---------------------------------------------------------------------------------------- Word typedef struct { int offset; char *value; char *unified; } Word; Word *word_new(int offset, char *value, char *unified, int len) { Word *rval = (Word *)malloc(sizeof *rval); rval->offset = offset; rval->value = strndup(value, len); rval->unified = strdup(unified); return rval; } void word_delete(Word *this) { free(this->unified); free(this->value); free(this); } //---------------------------------------------------------------------------------------- AnagramFinder typedef struct { int len; int nwords; Word ** words; Word * accepted[20]; int naccepted; //--- Current size int low_offset; int letters[26]; } AnagramFinder; char *anagram_finder_get_unified(AnagramFinder *this, char *tmp); AnagramFinder *anagram_finder_new(char *text, int len) { AnagramFinder *this = (AnagramFinder *)malloc(sizeof *this); memset(this, 0, sizeof *this); this->len = len; this->nwords = strlen(text) - len + 1; this->words = (Word **)malloc(this->nwords * sizeof this->words[0]); for (int n = 0; n < len-1; n++) this->letters[text[n] - 'A']++; for (int n = 0; n < this->nwords; n++) { char unified[40]; this->letters[text[n+len-1] - 'A']++; this->words[n] = word_new(n, text+n, anagram_finder_get_unified(this, unified), this->len); this->letters[text[n] - 'A']--; } return this; } void anagram_finder_delete(AnagramFinder *this) { for (int n = 0; n < this->nwords; n++) word_delete(this->words[n]); free(this->words); free(this); } int compare_words(void const *aa, void const *bb) { Word const *a = *(Word const **)aa; Word const *b = *(Word const **)bb; app.comparisons++; int rval = strcmp(a->unified, b->unified); return rval ? rval : (a->offset - b->offset); } char *anagram_finder_get_unified(AnagramFinder *this, char *tmp) { char *p = tmp; char letter = 'A'; for (unsigned n = 0; n < VECTSIZE(this->letters); n++, letter++) for (int m = 0; m < this->letters[n]; m++) *p++ = letter; *p++ = 0; return tmp; } void anagram_finder_print_one_result(AnagramFinder *this) { printf("%3d:", this->naccepted); for (int n = 0; n < this->naccepted; n++) { Word *it = this->accepted[n]; printf(" %4d:%s", it->offset, it->value); } printf("\n"); } void anagram_finder_add_candidate(AnagramFinder *this, Word *cand) { if (cand->offset < this->low_offset) return; for (int n = 0; n < this->naccepted; n++) if (0 == strcmp(cand->value, this->accepted[n]->value)) return; this->accepted[this->naccepted++] = cand; this->low_offset = cand->offset + this->len; } void anagram_finder_add_range(AnagramFinder *this, int begin, int end) { if (end-begin >= MINIMUM) { this->naccepted = 0; this->low_offset = 0; for (int n = begin; n < end; n++) anagram_finder_add_candidate(this, this->words[n]); if (this->naccepted >= MINIMUM) anagram_finder_print_one_result(this); } } void anagram_finder_find_all(AnagramFinder *this) { qsort(this->words, this->nwords, sizeof this->words[0], compare_words); int begin = 0; for (int n = 1; n < this->nwords; n++) { if (strcmp(this->words[n]->unified, this->words[begin]->unified)) { anagram_finder_add_range(this, begin, n); begin = n; } } anagram_finder_add_range(this, begin, this->nwords); } //---------------------------------------------------------------------------------------- main program int read_text(char *text, int allocated) { int ch; int end = 0; while (EOF != (ch = getchar())) { if (isalpha(ch)) { assert(end < allocated-1); text[end++] = toupper(ch); } } text[end] = 0; return end; } int main(int argc, char *argv[]) { char text[500 * 512 + 1]; int from = (argc > 1) ? atoi(argv[1]) : 6; int to = (argc > 2) ? atoi(argv[2]) : from; read_text(text, VECTSIZE(text)); for (int len = from; len <= to; len++) { printf("%slen = %d\n", (len > from) ? "\n" : "", len); AnagramFinder *finder = anagram_finder_new(text, len); anagram_finder_find_all(finder); anagram_finder_delete(finder); } printf("string length = %5lu, comparisons = %d\n", strlen(text), app.comparisons); return 0; }
You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090