Anagram finder in D

This is the D implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

D is very similar to C++. I have used the GNU compiler, gdc.

The source code

import std.stdio;
import std.string;
import std.algorithm;
import std.conv;
import std.uni;

const int MINIMUM     = 3;
;     int comparisons = 0;

class Word {
  public:
    int offset;
    char value[];
    char unified[];
    this(int offset, ref char value[], char unified[]) {
        this.offset  = offset;
        this.value   = value.dup;
        this.unified = unified;
    }
};

class AnagramFinder {
  private:
    int    len;
    Word   words[];
    int    accepted[];
    int    low_offset;

  public:

    this(char text[], int len) {
        this.len = len;
        accepted.reserve(40);
        words.length = (text.length - len + 1);
        foreach (int n, _; words) {
            char val[] = text[n..(n+len)];
            words[n] = new Word(n, val, get_unified(val));
        }
    }

    static int compare_words(Word a, Word b) {
        comparisons++;
        int rval = cmp(a.unified, b.unified);
        return (rval != 0) ? rval : (a.offset - b.offset);
    }

    static char[] get_unified(ref char[] s) {
        char tmp[] = s.dup;
        sort(tmp.representation);
        return tmp;
    }

    void print_one_result() {
        printf("%3d:", accepted.length);
        foreach (int it; accepted)
            printf(" %4d:%s", words[it].offset, toStringz(words[it].value));
        printf("\n");
    }

    void add_candidate(int cand) {
        Word c = words[cand];
        if (c.offset < low_offset)
            return;
        foreach (int it; accepted)
            if (words[it].value == c.value)
                return;
        accepted ~= cand;
        low_offset = c.offset + len;
    }

    void add_range(int begin, int end) {
         if (end-begin >= MINIMUM) {
             accepted.length = 0;
             low_offset = 0;
             for (int n = begin; n < end; n++)
                 add_candidate(n);
             if (accepted.length >= MINIMUM)
                 print_one_result();
         }
    }

    alias compare_less = (a, b) => (compare_words(a, b) < 0);

    void find_all() {
        words.sort!(compare_less);
        int begin = 0;
        foreach (n; 1..to!(int)(words.length)) {
            if (words[n].unified != words[begin].unified) {
                add_range(begin, n);
                begin = n;
            }
        }
        add_range(begin, to!(int)(words.length));
    }
};

static void read_text(ref char text[]) {
    char line[];
    line.reserve(200);
    text.length = 0;
    while (readln(line) > 0) {
        foreach (char ch; line) {
            if (isAlpha(ch))
                text ~= ch;
        }
    }
    toUpperInPlace(text);
}

void main(string argv[])
{
    int first =  (argv.length > 1) ? parse!int(argv[1]) : 6;
    int last   = (argv.length > 2) ? parse!int(argv[2]) : first;
    char text[];
    text.reserve(500*512);
    read_text(text);
    foreach (int len; first..(last+1)) {
        printf("%slen = %d\n", toStringz(len > first ? "\n" : ""), len);
        new AnagramFinder(text, len).find_all();
    }
    printf("string length = %5d, comparisons = %d\n", text.length, comparisons);
}

You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090

View source for the content of this page.