1 回答
TA贡献1876条经验 获得超7个赞
下面是一个应该非常有效的函数。
def window_fun(df, fun, col, partition_by, order_by = None, asc = None):
"""
equivalent to select fun(col) over (partion by [partition_by] order by [order_by]
df: a pandas DataFrame
fun: a function that accepts a series as its only input or which can be applied using Series.apply
x: the column name, as a string, you want to apply fun to
partition_by: group by keys as a single string or list of strings
order_by: order by keys as a single string or list of strings
"""
if type(partition_by) == str:
partition_by = [partition_by]
if order_by == None:
return df[[col]+partition_by].groupby(partition_by)[col].transform(fun)
if asc == None:
asc = [True] * (1 if type(order_by) == str else len(order_by))
if type(order_by) == str:
order_by = [order_by]
return df[[col]+order_by+partition_by].sort_values(order_by, ascending = asc).groupby(partition_by)[col].transform(fun)
例
df = pd.DataFrame.from_dict({'id': ['A', 'B', 'A', 'C', 'D', 'B', 'C'],
'val': [1,2,-3,1,5,6,-2], 'stuff':['12','23232','13','1234','3235','3236','732323'],
'otherstuff':np.arange(7)})
print(df)
window_fun(df,pd.Series.cumsum, 'val', "id", ["stuff", "otherstuff"], [True, False])
添加回答
举报