1 回答
TA贡献2011条经验 获得超2个赞
我可以得到(或多或少)预期的结果,而不用,但直接使用apply()groupby()
groups = df.groupby([df['UDH'].str[:-1], 'Original Sender ID'])
df2 = groups.agg({'Body':' '.join, 'Received Date/Time':max}).reset_index()
我只用于模拟文件。io.StringIO()
text = ''' Body UDH Original Sender ID Received Date/Time
Hi John, Can You ABC0010101 GGQMS 01/02/2001 01:03:19
Wait A moment? ABC0010102 GGQMS 01/02/2001 01:03:20
Whats is 050004000111 112233445566 01/03/2001 11:16:01
Carrine Doing 050004000112 112233445566 01/03/2001 11:16:01
Over There? 050004000113 112233445566 01/03/2001 11:16:02
Where is CD10F1011 zwerty 01/03/2001 15:22:10
Your Homework? CD10F1012 zwerty 01/03/2001 15:22:11
Order for Pizza AACCDD55001 112233445566 01/04/2001 19:20:21
Now for cheap $. AACCDD55002 112233445566 01/04/2001 19:20:22
John, you know G0500781 GGQMS 01/04/2001 10:21:21
Where can I get it? G0500782 GGQMS 01/04/2001 10:21:21'''
import pandas as pd
import io
df = pd.read_csv(io.StringIO(text), sep='\s{2,}')
#df['Received Date/Time'] = df['Received Date/Time'].astype('datetime64[ns]')
#df['UDH'] = df['UDH'].astype(object)
#df['Original Sender ID'] = df['Original Sender ID'].astype(object)
#df['Account User Name'] = df['Account User Name'].astype(object)
#df['Body'] = df['Body'].astype(str)
#df['UDH'] = df['UDH'].str.strip()
#Filter null row in UDH column
#df = df[df['UDH'].notnull()]
#df = df.sort_values(by ='UDH')
#groups = df.groupby([df['UDH'].str[:-1], 'Original Sender ID'])
#for name, data in groups:
#print(name)
# data['Received Date/Time'] = data['Received Date/Time'].min()
#print(data)
groups = df.groupby([df['UDH'].str[:-1], 'Original Sender ID'])
df2 = groups.agg({'Body':' '.join, 'Received Date/Time':max}).reset_index()
#groups = df.groupby([df['UDH'].str[:-1]])
#df2 = groups.agg({'Body':' '.join, 'Received Date/Time':max, 'Original Sender ID':min}).reset_index()
df2 = df2.sort_values('Received Date/Time')
pd.options.display.width = 200
print(df2)
结果
UDH Original Sender ID Body Received Date/Time
2 ABC001010 GGQMS Hi John, Can You Wait A moment? 01/02/2001 01:03:20
0 05000400011 112233445566 Whats is Carrine Doing Over There? 01/03/2001 11:16:02
3 CD10F101 zwerty Where is Your Homework? 01/03/2001 15:22:11
4 G050078 GGQMS John, you know Where can I get it? 01/04/2001 10:21:21
1 AACCDD5500 112233445566 Order for Pizza Now for cheap $. 01/04/2001 19:20:22
添加回答
举报