1 回答

TA贡献1982条经验 获得超2个赞
您可以使用collection.Counter:
from collections import Counter
a = ['computer', 'laptop', 'mac', 'use', 'bought', 'like',
'warranty', 'screen', 'way', 'just']
b = ['laptop', 'computer', 'use', 'just', 'like', 'time',
'great', 'windows', 'macbook', 'months']
c = ['computer', 'great', 'laptop', 'mac', 'buy', 'just',
'macbook', 'use', 'pro', 'windows']
d = ['laptop', 'computer', 'great', 'time', 'battery', 'use',
'apple', 'love', 'just', 'work']
def get_most_common(*kwargs):
"""Accepts iterables, feeds all into Counter and returns the Counter instance"""
c = Counter()
for k in kwargs:
c.update(k)
return c
# get the most common ones
mc = get_most_common(a,b,c,d).most_common()
# print top 4 keys
top4 = [k for k,v in mc[0:4]]
print (top4)
输出:
['computer', 'laptop', 'use', 'just']
some_results = [] # store stuff
for t in range(self.numTopics):
topWordIndices = pseudocounts[:, t].argsort()[-1:-(K+1):-1]
vocab = self.vectorizer.get_feature_names()
print (t, [vocab[i] for i in topWordIndices])
some_results.append( [vocab[i] for i in topWordIndices] )
mc = get_most_common(*some_results).most_common()
return [k for k,v in mc[0:4]]
添加回答
举报