foxshell/fn/-z4h-string-diff
2025-07-04 11:48:40 -05:00

84 lines
2.2 KiB
Bash

#!/usr/bin/env zsh
#
# Finds the difference between two strings using Levenshtein distance.
# Sets array reply to an even number of elements. The concatenation of odd
# elements (one-based) is equal to the first string; even elements -- second
# string.
#
# Example:
#
# % -z4h-string-diff 'sunday!' 'saturday'
# % printf '%-5s => %s\n' ${(qq)reply}
# 's' => 's'
# '' => 'at'
# 'u' => 'u'
# 'n' => 'r'
# 'day' => 'day'
# '!' => ''
#
# Time and memory complexity in a decent language would be O($#1 * $#2). In Zsh
# it's even worse than that. It's a good idea to check the value of $#1 * $#2
# before invoking this function. It takes about 0.2s if this multiple is 10000.
#
# Note: This is Wagner-Fischer algorithm.
emulate -L zsh -o no_unset -o ksh_arrays
local s=$1 t=$2
local -i m=$#s n=$#t w=$(($#t + 1)) i j p q r x y z e
local -a d=({1..$(((m + 1) * (n + 1)))})
d=(${d[@]//*/0})
local -a b=(${d[@]})
for ((i = 1; i <= m; ++i)); do
(( d[i*w] = i, b[i*w] = 1 ))
done
for ((j = 1; j <= n; ++j)); do
(( d[j] = j, b[j] = 2 ))
done
for ((i = 1; i <= m; ++i)); do
(( p = ${d[(i-1)*w]} ))
(( q = ${d[i*w]} ))
for ((j = 1; j <= n; ++j)); do
if [[ ${s[i-1]} == ${t[j-1]} ]]; then
(( z = p, e = 3 ))
else
(( z = p + 1, e = 7 ))
fi
(( y = q + 1 ))
# Half the time is spent executing the next line.
(( x = (p = ${d[(i-1)*w+j]}) + 1 ))
if (( x <= y )); then
if (( x <= z )); then
(( d[i*w+j] = q = x, b[i*w+j] = 1 ))
else
(( d[i*w+j] = q = z, b[i*w+j] = e ))
fi
elif (( y <= z )); then
(( d[i*w+j] = q = y, b[i*w+j] = 2 ))
else
(( d[i*w+j] = q = z, b[i*w+j] = e ))
fi
done
done
local -i k kind=-1 s1=m s2=m t1=n t2=n
local res=()
(( i = m, j = n ))
while (( i || j )); do
if (( (k = b[i*w+j]) != kind )); then
if (( s2 > s1 || t2 > t1 )); then
res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}")
(( s1 = s2 = i, t1 = t2 = j ))
fi
(( kind = k ))
fi
(( i -= k & 1, s1 -= k & 1, j -= (k >> 1 ) & 1, t1 -= (k >> 1 ) & 1 ))
done
if (( s2 > s1 || t2 > t1 )); then
res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}")
fi
typeset -g reply=("${(Oa)res[@]}")