In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

n_samples = 20
seed = np.random.RandomState(seed=3)
X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float)
X_true = X_true.reshape((n_samples, 2))
# Center the data
X_true -= X_true.mean()

similarities = euclidean_distances(X_true)
pd.DataFrame(similarities)
Out[1]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 0.000000 3.605551 11.401754 6.082763 3.000000 13.453624 11.401754 7.071068 8.246211 9.219544 11.180340 11.401754 8.544004 15.000000 6.324555 13.038405 10.440307 15.033296 6.403124 12.083046
1 3.605551 0.000000 14.866069 9.486833 6.324555 14.422205 14.035669 9.219544 6.082763 12.083046 14.000000 12.206556 11.045361 16.552945 9.433981 16.643317 8.000000 18.027756 7.615773 14.317821
2 11.401754 14.866069 0.000000 8.062258 9.848858 19.104973 12.649111 8.246211 19.235384 5.000000 11.704700 18.000000 12.041595 18.681542 5.830952 4.472136 21.470911 12.806248 14.317821 14.560220
3 6.082763 9.486833 8.062258 0.000000 3.162278 11.401754 6.403124 9.219544 12.041595 8.944272 5.830952 10.049876 4.472136 11.661904 6.403124 7.810250 14.212670 9.219544 6.324555 7.810250
4 3.000000 6.324555 9.848858 3.162278 0.000000 11.661904 8.544004 8.062258 9.433981 9.055385 8.246211 9.848858 5.830952 12.727922 6.082763 10.630146 11.661904 12.041595 5.099020 9.433981
5 13.453624 14.422205 19.104973 11.401754 11.661904 0.000000 7.280110 19.723083 11.180340 20.248457 8.246211 2.236068 7.071068 3.162278 17.464249 17.117243 12.000000 10.816654 7.071068 5.385165
6 11.401754 14.035669 12.649111 6.403124 8.544004 7.280110 0.000000 15.620499 13.928388 15.000000 1.000000 7.211103 3.000000 6.082763 12.727922 10.000000 15.652476 4.472136 7.280110 2.000000
7 7.071068 9.219544 8.246211 9.219544 8.062258 19.723083 15.620499 0.000000 15.033296 3.605551 15.000000 17.888544 13.453624 20.615528 3.162278 12.000000 17.117243 17.888544 13.000000 16.970563
8 8.246211 6.082763 19.235384 12.041595 9.433981 11.180340 13.928388 15.033296 0.000000 17.464249 14.317821 9.055385 11.180340 14.035669 14.560220 19.849433 2.236068 18.384776 6.708204 13.341664
9 9.219544 12.083046 5.000000 8.944272 9.055385 20.248457 15.000000 3.605551 17.464249 0.000000 14.212670 18.681542 13.416408 20.591260 3.000000 9.219544 19.646883 16.401219 14.142136 16.643317
10 11.180340 14.000000 11.704700 5.830952 8.246211 8.246211 1.000000 15.000000 14.317821 14.212670 0.000000 8.062258 3.162278 7.071068 12.041595 9.000000 16.124515 4.123106 7.615773 3.000000
11 11.401754 12.206556 18.000000 10.049876 9.848858 2.236068 7.211103 17.888544 9.055385 18.681542 8.062258 0.000000 6.082763 5.000000 15.811388 16.492423 10.049876 11.313708 5.000000 5.656854
12 8.544004 11.045361 12.041595 4.472136 5.830952 7.071068 3.000000 13.453624 11.180340 13.416408 3.162278 6.082763 0.000000 7.211103 10.816654 10.440307 13.038405 7.280110 4.472136 3.605551
13 15.000000 16.552945 18.681542 11.661904 12.727922 3.162278 6.082763 20.615528 14.035669 20.591260 7.071068 5.000000 7.211103 0.000000 18.027756 16.031220 15.033296 8.544004 8.944272 4.123106
14 6.324555 9.433981 5.830952 6.403124 6.082763 17.464249 12.727922 3.162278 14.560220 3.000000 12.041595 15.811388 10.816654 18.027756 0.000000 9.055385 16.763055 14.764823 11.180340 14.212670
15 13.038405 16.643317 4.472136 7.810250 10.630146 17.117243 10.000000 12.000000 19.849433 9.219544 9.000000 16.492423 10.440307 16.031220 9.055385 0.000000 22.022716 8.944272 13.892444 12.000000
16 10.440307 8.000000 21.470911 14.212670 11.661904 12.000000 15.652476 17.117243 2.236068 19.646883 16.124515 10.049876 13.038405 15.033296 16.763055 22.022716 0.000000 20.124612 8.602325 14.866069
17 15.033296 18.027756 12.806248 9.219544 12.041595 10.816654 4.472136 17.888544 18.384776 16.401219 4.123106 11.313708 7.280110 8.544004 14.764823 8.944272 20.124612 0.000000 11.704700 5.656854
18 6.403124 7.615773 14.317821 6.324555 5.099020 7.071068 7.280110 13.000000 6.708204 14.142136 7.615773 5.000000 4.472136 8.944272 11.180340 13.892444 8.602325 11.704700 0.000000 7.000000
19 12.083046 14.317821 14.560220 7.810250 9.433981 5.385165 2.000000 16.970563 13.341664 16.643317 3.000000 5.656854 3.605551 4.123106 14.212670 12.000000 14.866069 5.656854 7.000000 0.000000
In [2]:
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
pos = mds.fit(similarities).embedding_

nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
                    dissimilarity="precomputed", random_state=seed, n_jobs=1,
                    n_init=1)
npos = nmds.fit_transform(similarities, init=pos)

dissimilarity="precomputed"表示输入的是已经计算好的距离矩阵
metric=False表示是分类数据,metric=True表示是连续数据

In [3]:
mds.stress_#压力值,可以用来计算应当降为多少维
Out[3]:
1.6871996034640421e-07
In [4]:
# Rescale the data
pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum())
npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum())
In [5]:
# Rotate the data
pca = PCA(n_components=2)
X_true = pca.fit_transform(X_true)

pos = pca.fit_transform(pos)

npos = pca.fit_transform(npos)
In [6]:
fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])

s = 100
plt.scatter(X_true[:, 0], X_true[:, 1], color='navy', s=s, lw=0,
            label='True Position')

plt.show()
In [7]:
plt.scatter(pos[:, 0], pos[:, 1], color='turquoise', s=s, lw=0, label='MDS')

plt.show()
In [8]:
plt.scatter(npos[:, 0], npos[:, 1], color='darkorange', s=s, lw=0, label='NMDS')
plt.show()