import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import numpy as np
# 1. Generate sample data
# X contains the data points, y_true contains the actual labels (for verification/comparison)
X, y_true = make_blobs(n_samples=300, centers=3, cluster_std=0.60, random_state=0)
# Plot the initial data points
plt.scatter(X[:, 0], X[:, 1], s=50)
plt.title('Original Data Points')
plt.show()
# 2. Initialize the KMeans model
# n_clusters defines 'k', the number of clusters to form
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=0, n_init=10)
# 3. Fit the model to the data and predict the cluster labels
y_kmeans = kmeans.fit_predict(X)
# 4. Get the final cluster centers (centroids)
centers = kmeans.cluster_centers_
# 5. Visualize the results
plt.figure(figsize=(8, 6))
# Plot data points colored by their assigned cluster
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')
# Plot the centroids as black markers
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.7, marker='*')
plt.title('K-Means Clustering Results')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()


No comments:
Post a Comment