% Example program for the course STD VT98
% This program computes the Levenshtein distance between two strings
% which is the minimum number of insertion/deletion/substitution
% operation which have to be made to change one string in the other.
% Example: distance(abcd,abd)=1 because one character has to be
%          deleted from the first string to obtain the second.
% Usage:   start Prolog, load the file and run distance(abc,xyz).
%          in which abc and xyz are arbitrary strings.
% Notes:   this distance measure is symmetric.
%          terminology: I have used the words prefix and suffix as
%                       meaning an initial substring and a final substring
% 980301 erik.tjong@ling.uu.se

% distance/2: compute the Levenshtein distance between two strings
% arguments:  Word1: first input string; Word2: second input string
distance(Word1,Word2):-
   % divide the strings in characters
   name(Word1,Word1Chars),
   name(Word2,Word2Chars),
   % make a list with as many maximum integer values as Word2 has characters
   makeMaxList(Word2Chars,MaxList),
   % define the first left value
   LeftValue is 0,
   % compute distance between strings
   distance(Word1Chars,Word2Chars,MaxList,LeftValue,Distance),
   % present result
   writeln([Distance,'   ',Word1,'-',Word2]).

% example computation: distance(abcd,abc):
% the program will create a table like this: 
%
%        a  b  d
%      a 0  1  2
%      b 1  0  1
%      c 2  1  1
%      d 3  2  1
%
% Each element (x,y) depends on the values of (x-1,y), (x,y-1) and
% (x-1,y-1). Whenever the characters for x and y are the same then the
% value of (x,y) will be equal to the minimum of the three values it 
% depends on. When the characters are different then the value of 
% (x,y) will be this minimum value plus one. In order to be able to 
% compute the (x,y) values the program uses a virtual initial row 
% with only large integer values as elements (MaxList) and a virtual 
% initial column with the values 0,1,2,3... (LeftValue). 
%
% A problem of this similarity measure is that it does not recognize 
% the difference between x and xx: distance(a,aa)=0.

% distance/5 compute the Levenshtein distance between a suffix of a
% string and a complete string given information about the distance
% values of the prefix of the first string
% arguments: Suffix: input suffix of some string 1 
%            String: input string 2
%            DistanceValues: input distance values for prefix of string 1
%            LeftValue: the virtual value on the left of the current row
%            Distance: output distance
distance([],_,DistanceValues,_,Distance):-
   % the distance is equal to the last value in the list DistanceValues
   lastElement(DistanceValues,Distance).
distance([S|Suffix],String,DistanceValues,LeftValue,Distance):-
   % process the top row (first character of suffix string 1)
   distanceRow(S,String,[LeftValue|DistanceValues],LeftValue,
               DistanceValuesOut),
   % increase the left value
   LeftValue1 is LeftValue+1,
   % process the bottom of the table (rest of characters in suffix string 1)
   distance(Suffix,String,DistanceValuesOut,LeftValue1,Distance).

% distanceRow/5: compute the distances between one character and a string 
% arguments:     C: input character of string1
%                String: input string 2 (or a suffix of it)
%                DIn: input distance values (previous row/character)
%                PrevD: input distance between C and the character
%                       before string
%                Dout:  output list of distances between C and the
%                       characters in String
distanceRow(_,[],[_],_,[]).
distanceRow(C,[S|String],[DIn1,DIn2|DIns],PrevD,[Dout|Douts]):-
   ((C==S,CharD is 0);(C\==S,CharD is 1)),
   ((DIn1>PrevD,Min1 is PrevD);(DIn1=<PrevD,Min1 is DIn1)),
   ((DIn2>Min1,Min is Min1);(DIn2=<Min1,Min is DIn2)),
   Dout is Min+CharD,
   distanceRow(C,String,[DIn2|DIns],Dout,Douts).

% lastElement/2: return the last element of a list
lastElement([X],X).
lastElement([_|Xs],X):-
   lastElement(Xs,X).

% makeMaxList/2: make list with as many maximum integer values as
%                elements in input list 
% arguments:     Chars: input list
%                MaxList: output list with maximum integer values (65535)
makeMaxList([_|Chars],[65535|MaxList]):-
   makeMaxList(Chars,MaxList).
makeMaxList([],[]).

% writeln/1: write a list of terms
% argument:  X: list of strings
% source: Leon Sterling and Ehud Shapiro, "The Art of Prolog -
%         Advanced Programming Techniques". The MIT Press, 1986. 
%         ISBN 0-262-69105-1. Page 176 
writeln([X|Xs]):-
   write(X),
   writeln(Xs).
writeln([]):-nl.
