Anagram finder in C++, optimized – Lars Nordenström's blog

This is the optimized C++ implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

Also, you may find the C++ article helpful, since it describes member data and functions in some detail, not repeated here. This article describes only the differences between the C++ implementation and this implementation.

The source code

# include <stdio.h>
# include <string.h>
# include <ctype.h>
# include <assert.h>

# include <algorithm>
# include <vector>

# define VECTSIZE(x) ((sizeof (x)) / (sizeof (x)[0]))
# define MINIMUM 3

static int   len = 0;
static char *unified = 0;

static bool compare_words(int a, int b)
{
    int cmp = memcmp(unified + a*len, unified + b*len, len);
    return cmp ? (cmp < 0) : (a < b);
}

class AnagramFinder {
  private:
    char     *        text;
    int       const  nwords      = 0;
    int      *const   words      = 0;
    std::vector<int>  accepted;
    int               low_offset = 0;

  public:
    AnagramFinder(char *text, int const len)
      : text  (text),
        nwords(strlen(text) - len + 1),
        words (new int[nwords])
    {
        ::len       = len;
        ::unified = new char[len * nwords];
        char *put   = unified;
        char *front = text + len - 1;
        char *back  = text;
        int   letters[128];

        memset(letters, 0, sizeof letters);
        for (int n = 0; n < len-1; n++)
            letters[text[n]]++;

        for (int n = 0; n < nwords; n++) {
            letters[*front++]++;
            for (int ch = 'A'; ch <= 'Z'; ch++)
                for (int m = 0; m < letters[ch]; m++)
                    *put++ = ch;
            letters[*back++]--;
        }
        for (int n = 0; n < nwords; n++)
            words[n]  = n;
    }

    ~AnagramFinder() {
        delete unified;
        delete[] words;
    }

    void print_one_result() {
        printf("%3lu:", 0lu + accepted.size());
        for (int it: accepted)
            printf(" %4d:%.*s", it, len, text + it);
        printf("\n");
    }

    void add_candidate(int cand) {
        if (cand < low_offset)
            return;
        for (int it : accepted)
            if (0 == memcmp(text + it, text + cand, len))
                return;
        accepted.push_back(cand);
        low_offset = cand+len;
    }

    void add_range(int begin, int end) {
        if (end-begin >= MINIMUM) {
            accepted.clear();
            low_offset = 0;
            for (int n = begin; n < end; n++)
                add_candidate(words[n]);
            if (accepted.size() >= MINIMUM)
                print_one_result();
        }
    }

    void find_all() {
        std::sort(words, words + nwords, compare_words);

        int begin = 0;
        char *p = unified+words[begin]*len;
        for (int n = 1; n < nwords; n++) {
            if (0 != memcmp(unified + words[n]*len, p, len)) {
                add_range(begin, n);
                begin = n;
                p = unified+words[begin]*len;
            }
        }
        add_range(begin, nwords);
    }
};

static int read_text(char *text, int allocated)
{
    char  buf[10240];
    char *put = text;
    int   nread;
    while ((nread = fread(buf, 1, VECTSIZE(buf), stdin)) > 0) {
        for (int n = 0; n < nread; n++) {
            int ch = buf[n];
            if (isalpha(ch)) {
                assert(put < text + allocated);
                *put++ = toupper(ch);
            }
        }
    }
    assert(put < text + allocated);
    *put = 0;
    return put-text;
}

int main(int argc, char *argv[])
{
    char text[500 * 512 + 1];
    int from     = (argc > 1) ? atoi(argv[1]) : 6;
    int to       = (argc > 2) ? atoi(argv[2]) : from;
    int text_len = read_text(text, VECTSIZE(text));

    ::unified = new char[to * (text_len-from+1)];  //--- 10 * (512 - 4 + 1) (fmom = 4, to = 10)

    for (len = from; len <= to; len++) {
        AnagramFinder finder(text, len);
        if (len > from)
            printf("\n");
        printf("len = %d\n", len);
        finder.find_all();
    }

    printf("string length = %5d\n", text_len);
    return 0;
}

You can reach me by email at “lars dash 7 at sdu dot se” or by telephone +46 705 189090

View source for the content of this page.

The source code

Cancel reply