import numpy as np
import matplotlib.pyplot as plt
# ===== k-means本体 =====
def init_centroid(X, n_data, k):
idx = np.random.permutation(n_data)[:k]
centroids = X[idx]
return centroids
def compute_distances(X, k, n_data, centroids):
distances = np.zeros((n_data, k))
for idx_centroids in range(k):
dist = np.sqrt(np.sum((X - centroids[idx_centroids])**2, axis=1))
distances[:, idx_centroids] = dist
return distances
def k_means(k, X, max_iter=300):
n_data, n_features = X.shape
centroids = init_centroid(X, n_data, k)
new_cluster = np.zeros(n_data, dtype=int)
cluster = np.full(n_data, -1, dtype=int)
for epoch in range(max_iter):
distances = compute_distances(X, k, n_data, centroids)
new_cluster = np.argmin(distances, axis=1)
for idx_centroids in range(k):
if np.any(new_cluster == idx_centroids):
centroids[idx_centroids] = X[new_cluster == idx_centroids].mean(axis=0)
if (new_cluster == cluster).all():
break
cluster = new_cluster.copy()
return cluster, centroids
# ===== テストデータ作成 =====
np.random.seed(0)
# 3つのクラスタを人工生成
X1 = np.random.randn(50, 2) + np.array([0, 0])
X2 = np.random.randn(50, 2) + np.array([5, 5])
X3 = np.random.randn(50, 2) + np.array([0, 5])
X = np.vstack([X1, X2, X3])
# ===== k-means実行 =====
k = 3
labels, centroids = k_means(k, X)
# ===== 結果表示 =====
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.scatter(centroids[:, 0], centroids[:, 1],
color='red', marker='x', s=200, label='centroids')
plt.title("K-means Clustering Result")
plt.legend()
plt.show()