Dissimilarity Measure#
Overview about dissimilarity and distance measure.
%matplotlib inline
import numpy as np
from dissimilarity__utils import *
Euclidian Distance#
\[ \large \displaystyle
d(x,y) \mapsto \|x-y\|_2 = \left[\sum_{i}^{n}(x_i-y_i)^2\right]^\frac{1}{2} = \sqrt{\sum_{i}^{n}(x_i-y_i)^2}
\]
eucl = lambda x, y: np.sum((x - y)**2, axis=1)**0.5
x = np.array([0, 0])
dA = eucl(x, yA)
dB = eucl(x, yB).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'euclidean_distance', save=False)
Manhattan Distance#
\[ \large \displaystyle
d(x,y) \mapsto \|x-y\|_1 = \sum_{i}^{n}|x_i-y_i|
\]
manh = lambda x, y: np.sum(np.absolute(x - y), axis = 1)
x = np.array([0, 0])
dA = manh(x, yA)
dB = manh(x, yB).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'manhattan_distance', save=False)
Chebyshev Distance#
\[ \large \displaystyle
d(x,y) \mapsto \|x-y\|_\infty = \lim_{p\to\infty} \left(\sum_{i}^{n}|x_i-y_i|^p\right)^\frac{1}{p} = \overset{n}{\underset{i}{\max}}|x_i-y_i|
\]
cheb = lambda x, y: np.max(np.absolute(x - y), axis = 1)
x = np.array([0, 0])
dA = cheb(x, yA)
dB = cheb(x, yB).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'chebyshev_distance', save=False)
Minkowski Distance#
\[ \large \displaystyle
d(x,y) \mapsto \|x-y\|_p = \left(\sum_{i}^{n}|x_i-y_i|^p\right)^\frac{1}{p}
\]
mink = lambda x, y, p: np.sum(np.absolute(x - y)**p, axis = 1)**(1/p)
x = np.array([0, 0])
p = 2**-1
dA = mink(x, yA, p)
dB = mink(x, yB, p).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'minkowski_distance_A', ctitle=r'$p=2^{0}{2}{1}={3}$'.format('{', '}', -1, p), save=False)
%%time
fig, axes = plt.subplots(2, 4, sharex=True, sharey=True)
for j, axs in enumerate(axes):
for i, ax in enumerate(axs):
index = i + 4*j
exp = index - 3
pi = 2**exp
d = mink(x, yB, pi).reshape(s.shape)
plotContour(ax, d,
r'$p=2^{0}{2}{1}={3}$'.format('{', '}', exp, pi),
fsize=8)
figname = 'minkowski_distance_B'
fig.suptitle(' '.join([e.capitalize() for e in figname.split('_')]))
#fig.savefig('_output/similarity_{}.png'.format(figname), bbox_inches='tight')
Canberra Distance#
\[ \large \displaystyle
d(x,y) = \sum_{i}^{n}\frac{|x_i-y_i|}{|x_i|+|y_i|}
\]
def canb(x, y):
num = np.absolute(x - y)
den = np.absolute(x) + np.absolute(y)
return np.sum(num/den, axis = 1)
x = np.array([0.25, 0.25])
dA = canb(x, yA)
dB = canb(x, yB).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'canberra_distance', save=False)
Cosine Distance#
\[ \large \displaystyle
d(x,y) \mapsto 1 - \frac{x \cdotp y}{\|x\|_2\|y\|_2} = 1 - \frac{\sum_{i}^{n}x_i y_i}{\sqrt{\sum_{i}^{n}x_i^2}\sqrt{\sum_{i}^{n}y_i^2}}
\]
def coss(x, y):
if x.ndim == 1:
x = x[np.newaxis]
num = np.sum(x*y, axis=1)
den = np.sum(x**2, axis = 1)**0.5
den = den*np.sum(y**2, axis = 1)**0.5
return 1 - num/den
x = np.array([1e-7, 1e-7])
dA = coss(x, yA)
dB = coss(x, yB).reshape(s.shape)
%%time
plotDist(x, dA, dB, 'cosine_distance', save=False)
Hamming Distance#
\[ \large \displaystyle
d(x,y) = \sum_{i}^{n}w_i[x_i \neq y_i]
\]
def hamm(x, y):
lmax = max(len(x), len(y))
x, y = x.ljust(lmax), y.rjust(lmax)
return sum([a != b for a, b in zip(x, y)])
prec = 16
x = np.array([0, 0])
X = ieee754_realBin(x[0], n=prec) + ieee754_realBin(x[1], n=prec)
YA = np.vectorize(ieee754_realBin)(yA, n=prec)
YA = np.vectorize(lambda a, b: a + b)(YA[:,0], YA[:,1])
dA = np.vectorize(hamm)(X, YA)
YB = np.vectorize(ieee754_realBin)(yB, n=prec)
YB = np.vectorize(lambda a, b: a + b)(YB[:,0], YB[:,1])
dB = np.vectorize(hamm)(X, YB)
dB = dB.reshape(256, 256)
%%time
plotDist(x, dA, dB, 'hamming_distance_A', save=False)
x = 'Lampião'
yT = clni.split()
dT = {pal:hamm(x, pal) for pal in yT}
%%time
plotText(x, clni, dT, 'hamming_distance_B', save=False)