只需使用nltk即可,例如from nltk.cluster.kmeans import KMeansClustererNUM_CLUSTERS = <choose a value>data = <sparse matrix that you would normally give to scikit>.toarray()kclusterer = KMeansClusterer(NUM_CLUSTERS, distance=nltk.cluster.util.cosine_distance, repeats=25)assigned_clusters = kclusterer.cluster(data, assign_clusters=True)