This is the optimized C++ implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.
Also, you may find the C++ article helpful, since it describes member data and functions in some detail, not repeated here. This article describes only the differences between the C++ implementation and this implementation.
The source code
# include <stdio.h> # include <string.h> # include <ctype.h> # include <assert.h> # include <algorithm> # include <vector> # define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0])) # define MINIMUM 3 static int len = 0; static char *unified = 0; static bool compare_words(int a, int b) { int cmp = memcmp(unified + a*len, unified + b*len, len); return cmp ? (cmp < 0) : (a < b); } class AnagramFinder { private: char * text; int const nwords = 0; int *const words = 0; std::vector<int> accepted; int low_offset = 0; public: AnagramFinder(char *text, int const len) : text (text), nwords(strlen(text) - len + 1), words (new int[nwords]) { ::len = len; ::unified = new char[len * nwords]; char *put = unified; char *front = text + len - 1; char *back = text; int letters[128]; memset(letters, 0, sizeof letters); for (int n = 0; n < len-1; n++) letters[text[n]]++; for (int n = 0; n < nwords; n++) { letters[*front++]++; for (int ch = 'A'; ch <= 'Z'; ch++) for (int m = 0; m < letters[ch]; m++) *put++ = ch; letters[*back++]--; } for (int n = 0; n < nwords; n++) words[n] = n; } ~AnagramFinder() { delete unified; delete[] words; } void print_one_result() { printf("%3lu:", 0lu + accepted.size()); for (int it: accepted) printf(" %4d:%.*s", it, len, text + it); printf("\n"); } void add_candidate(int cand) { if (cand < low_offset) return; for (int it : accepted) if (0 == memcmp(text + it, text + cand, len)) return; accepted.push_back(cand); low_offset = cand+len; } void add_range(int begin, int end) { if (end-begin >= MINIMUM) { accepted.clear(); low_offset = 0; for (int n = begin; n < end; n++) add_candidate(words[n]); if (accepted.size() >= MINIMUM) print_one_result(); } } void find_all() { std::sort(words, words + nwords, compare_words); int begin = 0; char *p = unified+words[begin]*len; for (int n = 1; n < nwords; n++) { if (0 != memcmp(unified + words[n]*len, p, len)) { add_range(begin, n); begin = n; p = unified+words[begin]*len; } } add_range(begin, nwords); } }; static int read_text(char *text, int allocated) { char buf[10240]; char *put = text; int nread; while ((nread = fread(buf, 1, VECTSIZE(buf), stdin)) > 0) { for (int n = 0; n < nread; n++) { int ch = buf[n]; if (isalpha(ch)) { assert(put < text + allocated); *put++ = toupper(ch); } } } assert(put < text + allocated); *put = 0; return put-text; } int main(int argc, char *argv[]) { char text[500 * 512 + 1]; int from = (argc > 1) ? atoi(argv[1]) : 6; int to = (argc > 2) ? atoi(argv[2]) : from; int text_len = read_text(text, VECTSIZE(text)); ::unified = new char[to * (text_len-from+1)]; //--- 10 * (512 - 4 + 1) (fmom = 4, to = 10) for (len = from; len <= to; len++) { AnagramFinder finder(text, len); if (len > from) printf("\n"); printf("len = %d\n", len); finder.find_all(); } printf("string length = %5d\n", text_len); return 0; }
You can reach me by email at “lars dash 7 at sdu dot se” or by telephone +46 705 189090