1 回答
TA贡献1812条经验 获得超5个赞
用:
a, b = [],[]
for n1, n2, w1, w2 in zip(df['Name_prev'], df['Name_now'],
df['Weight_prev'], df['Weight_now']):
#get intersection of lists
n = [val for val in n1 if val in n2]
#get indices by enumerate and select weights
w3 = [w1[i] for i, val in enumerate(n1) if val in n2]
w4 = [w2[i] for i, val in enumerate(n2) if val in n1]
#divide each value in list
w = [i/j for i, j in zip(w3, w4)]
a.append(n)
b.append(w)
df = df.assign(name=a, weight=b)
print (df)
Id Name_prev Weight_prev Name_now Weight_now \
0 1 [1, 3, 4, 5] [10, 34, 67, 37] [1, 3, 5] [45, 76, 12]
1 2 [10, 3, 40, 5] [100, 134, 627, 347] [10, 40, 5] [34, 56, 78]
2 3 [1, 30, 4, 50] [11, 22, 45, 67] [1, 30, 50] [12, 45, 78]
3 4 [1, 7, 8, 9] [32, 54, 76, 98] [7, 8, 9] [34, 12, 32]
name weight
0 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083...
1 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448...
2 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858...
3 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625]
如果需要删除原始列使用DataFrame.pop:
a, b = [],[]
for n1, n2, w1, w2 in zip(df.pop('Name_prev'), df.pop('Name_now'),
df.pop('Weight_prev'), df.pop('Weight_now')):
n = [val for val in n1 if val in n2]
w3 = [w1[i] for i, val in enumerate(n1) if val in n2]
w4 = [w2[i] for i, val in enumerate(n2) if val in n1]
w = [i/j for i, j in zip(w3, w4)]
a.append(n)
b.append(w)
df = df.assign(name=a, weight=b)
print (df)
Id name weight
0 1 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083...
1 2 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448...
2 3 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858...
3 4 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625]
编辑:
在 Pandas 中使用列表总是不是矢量化的,所以最好先扁平化列表,merge如果需要的话聚合lists:
from itertools import chain
df_prev = pd.DataFrame({
'Name' : list(chain.from_iterable(df['Name_prev'].values.tolist())),
'Weight_prev' : list(chain.from_iterable(df['Weight_prev'].values.tolist())),
'Id' : df['Id'].values.repeat(df['Name_prev'].str.len())
})
print (df_prev)
Name Weight_prev Id
0 1 10 1
1 3 34 1
2 4 67 1
3 5 37 1
4 10 100 2
5 3 134 2
6 40 627 2
7 5 347 2
8 1 11 3
9 30 22 3
10 4 45 3
11 50 67 3
12 1 32 4
13 7 54 4
14 8 76 4
15 9 98 4
df_now = pd.DataFrame({
'Name' : list(chain.from_iterable(df['Name_now'].values.tolist())),
'Weight_now' : list(chain.from_iterable(df['Weight_now'].values.tolist())),
'Id' : df['Id'].values.repeat(df['Name_now'].str.len())
})
print (df_now)
Name Weight_now Id
0 1 45 1
1 3 76 1
2 5 12 1
3 10 34 2
4 40 56 2
5 5 78 2
6 1 12 3
7 30 45 3
8 50 78 3
9 7 34 4
10 8 12 4
11 9 32 4
df = df_prev.merge(df_now, on=['Id','Name'])
df['Weight'] = df['Weight_prev'] / df['Weight_now']
print (df)
Name Weight_prev Id Weight_now Weight
0 1 10 1 45 0.222222
1 3 34 1 76 0.447368
2 5 37 1 12 3.083333
3 10 100 2 34 2.941176
4 40 627 2 56 11.196429
5 5 347 2 78 4.448718
6 1 11 3 12 0.916667
7 30 22 3 45 0.488889
8 50 67 3 78 0.858974
9 7 54 4 34 1.588235
10 8 76 4 12 6.333333
11 9 98 4 32 3.062500
df = df.groupby('Id')['Name','Weight'].agg(list).reset_index()
print (df)
Id Name Weight
0 1 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083...
1 2 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448...
2 3 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858...
3 4 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625]
添加回答
举报