#use warnings; #use strict; # based on http://cs.anu.edu.au/~Peter.Christen/Febrl/febrl-0.4.01/stringcmp.py # see editex use List::Util qw(min max); sub editex { # globals my $SML_COSTS = 2; my $BIG_COSTS = 3; # letter grouping my %group = ( map( { $_ => 0 } qw( a e i o u y ) ), map( { $_ => 1 } qw( b f p v ) ), map( { $_ => 2 } qw( c g j k q s x z ) ), map( { $_ => 3 } qw( d t ) ), map( { $_ => 4 } qw( l ) ), map( { $_ => 5 } qw( m n ) ), map( { $_ => 6 } qw( r ) ), map( { $_ => 7 } qw( h w _ ) ), ); sub delcost { (my $char1, my $char2) = @_; if ($char1 eq $char2){ return 0; } my $code1 = $group{$char1}; my $code2 = $group{$char2}; if (($code1 == $code2) or ($code2 == 7)) { return $SML_COSTS; } else { return $BIG_COSTS; } } (my $str1, my $str2) = @_; if (($str1 eq "") or ($str2 eq "")){ return 0; } elsif ($str1 eq $str2) { return 1; } my $n = length($str1); my $m = length($str2); $str1 =~ s/ /_/; $str2 =~ s/ /_/; if ($n > $m) { $temp = $str1; $str1 = $str2; $str2 = $temp; $temp = $n; $n = $m; $m = $temp; } my @F = (); foreach my $i ( 0 .. $n+1 ) { foreach my $j ( 0 .. $m+1 ) { push @{ $F[$i] }, 0; } } $F[1][0] = $BIG_COSTS; $F[0][1] = $BIG_COSTS; my $sum = $BIG_COSTS; for (my $i = 2; $i <= $n+1; $i++){ $sum += delcost(substr($str1,$i-2,1),substr($str1,$i-1,1)); $F[$i][0] = $sum; } my $sum = $BIG_COSTS; for (my $j = 2; $j <= $m+1; $j++){ $sum += delcost(substr($str2,$j-2,1),substr($str2,$j-1,1)); $F[0][$j] = $sum; } for (my $i = 1; $i <= $n+1; $i++){ my $inc1 = 0; if ($i == 1) { $inc1 = $BIG_COSTS; } else { $inc1 = delcost(substr($str1,$i-2,1),substr($str1,$i-1,1)); } for (my $j = 1; $j <= $m+1; $j++){ my $inc2 = 0; my $diag = 0; if ($j == 1) { $inc2 = $BIG_COSTS; } else { $inc2 = delcost(substr($str2,$j-2,1),substr($str2,$j-1,1)); } if (substr($str1,$i-1,1) eq substr($str2,$j-1,1)) { $diag = 0; } else { my $code1 = $group{substr($str1,$i-1,1)}; my $code2 = $group{substr($str2,$j-1,1)}; if ($code1 == $code2) { # Same phonetic group $diag = $SML_COSTS; } else { $diag = $BIG_COSTS; } } $F[$i][$j] = min($F[$i-1][$j]+$inc1, $F[$i][$j-1]+$inc2, $F[$i-1][$j-1]+$diag); } } my $num = $F[$n][$m]; my $den1 = $F[0][$m]; my $den2 = $F[$n][0]; my $w = 1 - ( $num / max($den1,$den2)); # print $w; return $w; } print editex("feigenbaum","seigenbaum");