#!/usr/bin/env zsh # # Finds the difference between two strings using Levenshtein distance. # Sets array reply to an even number of elements. The concatenation of odd # elements (one-based) is equal to the first string; even elements -- second # string. # # Example: # # % -z4h-string-diff 'sunday!' 'saturday' # % printf '%-5s => %s\n' ${(qq)reply} # 's' => 's' # '' => 'at' # 'u' => 'u' # 'n' => 'r' # 'day' => 'day' # '!' => '' # # Time and memory complexity in a decent language would be O($#1 * $#2). In Zsh # it's even worse than that. It's a good idea to check the value of $#1 * $#2 # before invoking this function. It takes about 0.2s if this multiple is 10000. # # Note: This is Wagner-Fischer algorithm. emulate -L zsh -o no_unset -o ksh_arrays local s=$1 t=$2 local -i m=$#s n=$#t w=$(($#t + 1)) i j p q r x y z e local -a d=({1..$(((m + 1) * (n + 1)))}) d=(${d[@]//*/0}) local -a b=(${d[@]}) for ((i = 1; i <= m; ++i)); do (( d[i*w] = i, b[i*w] = 1 )) done for ((j = 1; j <= n; ++j)); do (( d[j] = j, b[j] = 2 )) done for ((i = 1; i <= m; ++i)); do (( p = ${d[(i-1)*w]} )) (( q = ${d[i*w]} )) for ((j = 1; j <= n; ++j)); do if [[ ${s[i-1]} == ${t[j-1]} ]]; then (( z = p, e = 3 )) else (( z = p + 1, e = 7 )) fi (( y = q + 1 )) # Half the time is spent executing the next line. (( x = (p = ${d[(i-1)*w+j]}) + 1 )) if (( x <= y )); then if (( x <= z )); then (( d[i*w+j] = q = x, b[i*w+j] = 1 )) else (( d[i*w+j] = q = z, b[i*w+j] = e )) fi elif (( y <= z )); then (( d[i*w+j] = q = y, b[i*w+j] = 2 )) else (( d[i*w+j] = q = z, b[i*w+j] = e )) fi done done local -i k kind=-1 s1=m s2=m t1=n t2=n local res=() (( i = m, j = n )) while (( i || j )); do if (( (k = b[i*w+j]) != kind )); then if (( s2 > s1 || t2 > t1 )); then res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}") (( s1 = s2 = i, t1 = t2 = j )) fi (( kind = k )) fi (( i -= k & 1, s1 -= k & 1, j -= (k >> 1 ) & 1, t1 -= (k >> 1 ) & 1 )) done if (( s2 > s1 || t2 > t1 )); then res+=("${t:$t1:$((t2-t1))}" "${s:$s1:$((s2-s1))}") fi typeset -g reply=("${(Oa)res[@]}")