#!/usr/bin/perl
#use warnings;
#use strict;

# Find the longest repeated substring of $stirng

# DESCRIPTION:
#
# Everything tries to leverage the fact that a repeated substring of
# length L is made up of repeated substrings of lengths < L.
#
# - The array @valid_starts keeps track of where previuos repeated
# substrings have occured, so that we don't check useless posititons.
#
# - we use a hash to avoid re checknig the same pattern twice
#
# - we always remeber the longest string found so far; if we try to
# find repeats of length L and can't we know that the most recently
# found repeated (ie the value of $longest_so_far) is the (rightmost)
# longest repeated substring and we're done.
#
# - valid_starts is conceptually an array, but it seems that using a
# string with substr and index is actually faster

sub repeated_substring {
    my $string_length = length($_[0]);
    my $max_length = int($string_length / 2);
    # initially every spot is valid
    my $valid_starts = "1" x $string_length;
    my $longest_so_far_start = undef;
    my $longest_so_far_length = undef;
    my %patterns = ();

    for (my $length = 1; $length <= $max_length; $length++) {
        my $found_something = 0;
        my $pattern;
        # putting this here (where it should go) stops constitution at
        # the 5th iteration...
        #my %patterns = ();
        for (my $offset = 0;
             ($offset <= $string_length - $length)
             && $offset != -1;
             $offset = index($valid_starts, "1", $offset + 1)) {
            $pattern = substr $_[0], $offset, $length;
            next if $patterns{$pattern}++;
            # we want to see if $pattern appears later on in the
            # text. we know $subpattern does, so we start with that
            my $index = index($_[0], $pattern, $offset + $length);
            if ($index != -1) {
                $longest_so_far_start  = $index;
                $longest_so_far_length = $length;
                $found_something = 1;
            } else {
                if ($offset > 0) {
                    # why the next bit is legit:
                    # - every repeated substring is made up of
                    # repeated substrings.
                    # - we DON'T have a repeated substring at $offset.
                    # - Therefore: the substring starting at $offset - 1
                    # and of length $length + 1 (which is the string
                    # we would check next time around) is made up of
                    # some char and a non repeated substring, so it
                    # can't be a repeated substring.
                    substr($valid_starts, $offset - 1, 2, "00");
                } else {
                    substr($valid_starts, 0, 1, "0");
                }
            }
        }
        if ($found_something == 0) {
            goto DONE;
        }
    }
  DONE:
    if (defined $longest_so_far_start) {
        return substr $_[0], $longest_so_far_start, $longest_so_far_length;
    } else {
        return "";
    }
}

1;
