1 回答
TA贡献1836条经验 获得超5个赞
这应该工作:
import pandas as pd
import numpy as np
from difflib import SequenceMatcher
def similar(a, b):
ratio = SequenceMatcher(None, a, b).ratio()
return ratio
data1 = {'Fruit': ['Apple', 'Pear', 'mango', 'Pinapple'],
'nr1': [22000, 25000, 27000, 35000],
'nr2': [1, 2, 3, 4]}
data2 = {'Fruit': ['Apple', 'mango', 'peer', 'Pinapple'],
'nr1': [22000, 25000, 27000, 35000],
'nr2': [1, 2, 3, 4]}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
order = []
for index, row in df1.iterrows():
maxima = [similar(row['Fruit'], j) for j in df2['Fruit']]
best_ratio = max(maxima)
best_row = np.argmax(maxima)
order.append(best_row)
df2 = df2.iloc[order].reset_index()
pd.concat([df1, df2], axis=1)
添加回答
举报