84 lines
2.2 KiB
Bash
84 lines
2.2 KiB
Bash
#!/usr/bin/env zsh
|
|
#
|
|
# Finds the difference between two strings using Levenshtein distance.
|
|
# Sets array reply to an even number of elements. The concatenation of odd
|
|
# elements (one-based) is equal to the first string; even elements -- second
|
|
# string.
|
|
#
|
|
# Example:
|
|
#
|
|
# % -z4h-string-diff 'sunday!' 'saturday'
|
|
# % printf '%-5s => %s\n' ${(qq)reply}
|
|
# 's' => 's'
|
|
# '' => 'at'
|
|
# 'u' => 'u'
|
|
# 'n' => 'r'
|
|
# 'day' => 'day'
|
|
# '!' => ''
|
|
#
|
|
# Time and memory complexity in a decent language would be O($#1 * $#2). In Zsh
|
|
# it's even worse than that. It's a good idea to check the value of $#1 * $#2
|
|
# before invoking this function. It takes about 0.2s if this multiple is 10000.
|
|
#
|
|
# Note: This is Wagner-Fischer algorithm.
|
|
|
|
emulate -L zsh -o no_unset -o ksh_arrays
|
|
|
|
local s=$1 t=$2
|
|
local -i m=$#s n=$#t w=$(($#t + 1)) i j p q r x y z e
|
|
local -a d=({1..$(((m + 1) * (n + 1)))})
|
|
d=(${d[@]//*/0})
|
|
local -a b=(${d[@]})
|
|
|
|
for ((i = 1; i <= m; ++i)); do
|
|
(( d[i*w] = i, b[i*w] = 1 ))
|
|
done
|
|
for ((j = 1; j <= n; ++j)); do
|
|
(( d[j] = j, b[j] = 2 ))
|
|
done
|
|
|
|
for ((i = 1; i <= m; ++i)); do
|
|
(( p = ${d[(i-1)*w]} ))
|
|
(( q = ${d[i*w]} ))
|
|
for ((j = 1; j <= n; ++j)); do
|
|
if [[ ${s[i-1]} == ${t[j-1]} ]]; then
|
|
(( z = p, e = 3 ))
|
|
else
|
|
(( z = p + 1, e = 7 ))
|
|
fi
|
|
(( y = q + 1 ))
|
|
# Half the time is spent executing the next line.
|
|
(( x = (p = ${d[(i-1)*w+j]}) + 1 ))
|
|
if (( x <= y )); then
|
|
if (( x <= z )); then
|
|
(( d[i*w+j] = q = x, b[i*w+j] = 1 ))
|
|
else
|
|
(( d[i*w+j] = q = z, b[i*w+j] = e ))
|
|
fi
|
|
elif (( y <= z )); then
|
|
(( d[i*w+j] = q = y, b[i*w+j] = 2 ))
|
|
else
|
|
(( d[i*w+j] = q = z, b[i*w+j] = e ))
|
|
fi
|
|
done
|
|
done
|
|
|
|
local -i k kind=-1 s1=m s2=m t1=n t2=n
|
|
local res=()
|
|
(( i = m, j = n ))
|
|
while (( i || j )); do
|
|
if (( (k = b[i*w+j]) != kind )); then
|
|
if (( s2 > s1 || t2 > t1 )); then
|
|
res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}")
|
|
(( s1 = s2 = i, t1 = t2 = j ))
|
|
fi
|
|
(( kind = k ))
|
|
fi
|
|
(( i -= k & 1, s1 -= k & 1, j -= (k >> 1 ) & 1, t1 -= (k >> 1 ) & 1 ))
|
|
done
|
|
|
|
if (( s2 > s1 || t2 > t1 )); then
|
|
res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}")
|
|
fi
|
|
|
|
typeset -g reply=("${(Oa)res[@]}")
|