1 回答
TA贡献1847条经验 获得超7个赞
虚拟数据
from pyspark.ml.linalg import Vectors
from pyspark.ml.clustering import KMeans, KMeansModel
from pyspark.ml.pipeline import Pipeline
data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
(Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
matrix_normalized = spark.createDataFrame(data, ["scaledFeatures"])
您的代码
kmeans = KMeans() \
.setK(3) \
.setFeaturesCol("scaledFeatures")\
.setPredictionCol("cluster")
# Chain indexer and tree in a Pipeline
pipeline = Pipeline(stages=[kmeans])
model = pipeline.fit(matrix_normalized)
cluster = model.transform(matrix_normalized)
只需更改最后一行
model.stages[0].clusterCenters()
[array([0.5, 0.5]), array([8., 9.]), array([9., 8.])]
添加回答
举报