Anagram finder in PHP

This is the PHP implementation of the anagram program. The problem and the solution is described in the main anagram article. Do read that article first.

Also, you may find the C++ article helpful, since it describes member data and functions in some detail, not repeated here. This article describes only the differences between the C++ implementation and this implementation.

The source code

#!/usr/bin/php

$comparisons = 0;

$MINIMUM = 3;

//---------------------------------------------------------------------------------------- Word

class Word {
    public function __construct($offset, $value, $unified) {
        $this->offset  = $offset;
        $this->value   = $value;
        $this->unified = $unified;
    }
    public $offset;
    public $value;
    public $unified;
}

//---------------------------------------------------------------------------------------- AnagramFinder

class AnagramFinder {
    public $words;
    public $len;
    public $accepted   = array();
    public $low_offset = 0;

    public function __construct($text, $len) {
        $this->len = $len;
        $this->words = array();
        $last = strlen($text) - $len;
        for ($n = 0; $n <= $last; $n++) {
            $value = substr($text, $n, $len);
            $this->words[$n] = new Word($n, $value, AnagramFinder::get_unified($value));
       }
    }

    public static function compare_words($a, $b) {
        GLOBAL $comparisons;
        $comparisons++;
        $rval = strcmp($a->unified, $b->unified);
        return $rval ? $rval : ($a->offset - $b->offset);
    }

    public static function get_unified($s) {
        $ref = str_split($s);
        asort($ref);
        return implode("", $ref);
    }

    public function print_accepted() {
        printf("%3d:", count($this->accepted));
        foreach ($this->accepted as $it)
            printf(" %4d:%s", $it->offset, $it->value);
        printf("\n");
    }

    public function add_candidate($cand) {
        if ($cand->offset < $this->low_offset)
            return;
        foreach ($this->accepted as $it)
            if (0 == strcmp($it->value, $cand->value))
                return;
        $this->accepted[] = $cand;
        $this->low_offset = $cand->offset + $this->len;
    }

    public function add_range($anagrams) {
        GLOBAL $MINIMUM;
        $this->accepted   = array();
        $this->low_offset = 0;
        if (count($anagrams) >= $MINIMUM) {
            foreach ($anagrams as $it)
                $this->add_candidate($it);
            if (count($this->accepted) >= $MINIMUM)
                $this->print_accepted();
        }
    }
    public function find_all() {
        usort($this->words, "AnagramFinder::compare_words");

        $anagrams = array();
        $prev = 0;
        foreach ($this->words as $it) {
            if ($prev && $it->unified != $prev->unified) {
                $this->add_range($anagrams);
                $anagrams = array();
            }
            $prev = $it;
            $anagrams[] = $it;
        }
        $this->add_range($anagrams);
        return;
    }
}

//---------------------------------------------------------------------------------------- main program

function read_text() {
    while ($line = fgets(STDIN)) {
        $array = str_split(strtoupper($line));
        foreach ($array as $char) {
            if (ctype_alpha($char)) {
                $chars[] = $char;
            }
        }
    }
    return implode($chars);
}

function main() {
    GLOBAL $argv, $comparisons;

    $text = read_text();

    assert(3 == $argc);
    $from = $argv[1];
    $to   = $argv[2];
    for ($l = $from; $l <= $to; $l++) {
        $af = null;  //--- Yes, really!
        $af = new AnagramFinder($text, $l);
        if ($l > $from)
            printf("\n");
        printf("len = %d\n", $l);
        $af->find_all();
    }
    printf("string length = %5d, comparisons = %d\n", strlen($text), $comparisons);
}

main();

?>

You can reach me by email at “lars dash 7 dot sdu dot se” or by telephone +46 705 189090

View source for the content of this page.