package Regex::Optimizer;
use strict;
use warnings;
use base qw/Regex::List/;
use re 'eval';

our $VERSION = do { my @r = (q$Revision: 1.12 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r } ;

my $listVer = $Regex::List::VERSION;
die "Regex::Optimizer requires version 1.06 of Regex::List, but version $listVer is installed!\n" if $listVer ne '1.06';

our $RE_PAREN; # predeclear
$RE_PAREN =
    qr{
       \(
       (?:
	(?> [^()]+ )
	|
	(??{ $RE_PAREN })
       )*
       \)
      }xo;
our $RE_EXPR; # predeclear
$RE_EXPR = 
    qr{
       \{
       (?:
	(?> [^{}]+ )
	|
	(??{ $RE_EXPR })
       )*
       \}
      }xo;
our $RE_PIPE = qr/(?!\\)\|/o;
our $RE_CHAR = 
    qr{(?:
	# single character...
	(?!\\)[^\\\[(|)\]]       | # raw character except '[(|)]'
	$Regex::List::RE_XCHAR  | # extended characters
       )}xo;
our $RE_CCLASS = 
    qr{(?:
	(?!\\)\[ $RE_CHAR+? \] |
	$Regex::List::RE_XCHAR      | # extended characters
	(?!\\)[^(|)]                 | # raw character except '[(|)]'
	# Note pseudo-characters are not included
    )}xo;
our $RE_QUANT =
    qr{(?:
	(?!\\)
	    (?:
	     \? |
	     \+ |
	     \* |
	     \{[\d,]+\}
	     )\??
	)}xo;
our $RE_TOKEN = 
    qr{(?:
	(?:
	\\[ULQ] (?:$RE_CHAR+)(?:\\E|$) | # [ul]c or quotemeta
        $Regex::List::RE_PCHAR  | # pseudo-characters
        $RE_CCLASS |
	$RE_CHAR     
       )
	 $RE_QUANT?
       )}xo;
our $RE_START = $Regex::List::RE_START;

our %PARAM = (meta      => 1,
	      quotemeta => 0,
	      lookahead => 0,
	      optim_cc  => 1,
	      modifiers => '',
	      _char     => $RE_CHAR,
	      _token    => $RE_TOKEN,
	      _cclass   => $RE_CCLASS,
	     );

sub new{
    my $class = ref $_[0] ? ref shift : shift ;
    my $self = $class->SUPER::new;
    $self->set(%PARAM, @_);
    $self;
}

sub list2re{
    shift->SUPER::list2re(map {_strip($_)} @_);
}

sub optimize{
    my $self = shift;
    my $str  = shift;
    $self->{unexpand} and $str = $self->unexpand($str);
    # safetey feature against qq/(?:foo)(?:bar)/
    !ref $str and $str =~ /^$RE_START/ and $str = qr/$str/;

    no warnings 'uninitialized';
    # escape | inside []
    while ($str =~ /(?:[^\\]\[|^\[)(.*?)\]/g) {
        my $p = my $m = $1;
        next unless $p =~ s/(?:([^\\])\||^\|)/$1\\|/g;
        $str =~ s/\Q$m\E/$p/;
    }

    my $re = $self->_optimize($str);
    $re =~ s/^\?\^u://;
    $re =~ s/\?([xims\^]*-?[xims\^]*)\:\:\?/?$1:/;
    $re =~ s/(?:$RE_PIPE)?\|+/$1|/g;
    qr/$re/;
}

sub _strip{
    my ($str, $force) = @_;
    $force or ref $str eq 'Regexp' or return $str;
    $str =~ s/^($RE_START)//o or return $str;
    my $regopt = $1;  $str =~ s/\)$//o;
    $regopt =~ s/^\(\??//o; 
    $regopt =~ /^[-:]/ and $regopt = undef;
    ($str, $regopt);
}

my %my_l2r_opts = 
    (
     as_string => 1, 
     debug     => 0,
     _token    => qr/$RE_PAREN$RE_QUANT?|$RE_PIPE|$RE_TOKEN/,
    );

sub _optimize{
    no warnings 'uninitialized';
    my $self = shift;
    $self->{debug} and $self->{_indent}++;
    $self->{debug} and
	print STDERR '>'x $self->{_indent}, " ", $_[0], "\n";
    my ($result, $regopt)  = _strip(shift, 1);
    $result =~ s/\\([()])/"\\x" . sprintf("%X", ord($1))/ego;
    # $result =~ s/(\s)/"\\x" . sprintf("%X", ord($1))/ego;
    my @noOptimize;
    while ($result =~ s/\<\<\<(.*?)\>\>\>\|?//) {
        push @noOptimize, $1;
    }
    $result !~ /$RE_PIPE/ and goto RESULT;
    my $l = $self->clone->set(%my_l2r_opts);
    # optimize
    unless ($result =~ /$RE_PAREN/){
        my @words = split /$RE_PIPE/ => $result;
        $result = $l->list2re(@words);
	    goto RESULT;
    }
    my (@term, $sp);
    while ($result){
	if ($result =~ s/^($RE_PAREN)($RE_QUANT?)//){
	    my ($term, $quant) = ($1, $2);
	    $term = $self->_optimize($term);
	    $l->{optim_cc} = $quant ? 0 : 1;
	    if ($quant){
		if ($term =~ /^$self->{_cclass}$/){
		    $term .= $quant;
		}else{
		    $term = $self->{po} . $term . $self->{pc} . $quant;
		}
	    }
	    $term[$sp] .= $term;
	}elsif($result =~ s/^$RE_PIPE//){
	    $sp += 2;
	    push @term, '|';
	}elsif($result =~ s/^($RE_TOKEN+)//){
	    # warn $1;
	    $term[$sp] .= $1;
	}else{
	    die "something is wrong !";
	}
    }
    # warn scalar @term , ";", join(";" => @term);
    # sleep 1;
    my @stack;
    my @tstack;
    while (my $term = shift @term){
	if ($term eq '|'){
            push @tstack, shift @term;
	}else{
            if (@tstack) {
                push @stack, $l->list2re(@tstack);
                @tstack = ();
            }
	    push @tstack, $term;
	}
    }
    if (@tstack) {
        push @stack, $l->list2re(@tstack);
    }
    $result = join('' => @stack);
 RESULT:
    if (@noOptimize && $result) {
        $result =~ s/^\(\?[isxm\-]*\:\)$//o;
        my $end = $1 if $result =~ s/(\)\))$//o;
        $result .= ($result ? '|' : '') . join('|' => @noOptimize) . $end;
    } elsif (@noOptimize) {
        $result = join('|' => @noOptimize);
    }
    $result =  qq/(?$regopt$result)/ if $regopt;
    # warn qq($result, $regopt);
    $self->{debug} and 
	print STDERR '<'x $self->{_indent}, " ", $result, "\n";
    $self->{debug} and $self->{_indent}--;
    $result;
}

sub _pair2re{
    my $self = shift;
    $_[0] eq $_[1] and return $_[0];
    my ($first, $second) =
	length $_[0] <= length $_[1] ? @_ : ($_[1], $_[0]);
    my $l = length($first);
    $l -= 1
	while $self->_head($first, $l) ne $self->_head($second, $l);
    $l > 0 or return join("", @_);
    return $self->_head($first, $l) . 
	$self->{po} . 
	$self->_tail($first, $l) . '|' . $self->_tail($second, $l) .
	$self->{pc};
}

1;

