1 回答
TA贡献1802条经验 获得超6个赞
您可以使用merge左加入由创建辅助数据框与numpy.repeat和numpy.tile:
#get number of new rows by Counter.most_common(1)
from collections import Counter
no_vals = Counter(df.index.labels[0]).most_common(1)[0][1]
print(no_vals)
3
df1 = pd.DataFrame({'region':np.repeat(df.index.levels[0], no_vals),
'id': np.tile(np.arange(no_vals), len(np.unique(df.index.labels[0])))})
print (df1)
region id
0 intro 0
1 intro 1
2 intro 2
3 mid 0
4 mid 1
5 mid 2
6 start 0
7 start 1
8 start 2
9 title 0
10 title 1
11 title 2
#MultiIndex to columns
df = df.reset_index()
#new could with counter of regions
df.insert(1, 'id', df.groupby('region').cumcount())
#merge, remove helper id columns and create MultiIndex
df = (df1.merge(df, how='left')
.drop('id', 1)
.set_index(['region', 'feat_index', 'position_in_region']))
print (df)
document_0 document_1 document_2
region feat_index position_in_region
intro 9.0 422.0 0.39 0.80 0.81
3.0 5834.0 0.79 0.06 0.15
0.0 8813.0 0.01 0.92 0.19
mid 7.0 3187.0 0.55 0.74 0.17
8.0 9407.0 0.99 0.06 0.11
NaN NaN NaN NaN NaN
start 2.0 997.0 0.67 0.96 0.34
4.0 3154.0 0.61 0.57 0.80
1.0 8416.0 0.84 0.19 0.03
title 6.0 5408.0 0.15 0.29 0.67
5.0 8421.0 0.23 0.24 0.46
NaN NaN NaN NaN NaN
另一个解决方案DataFrame.reindexand MultiIndex.from_product:
from collections import Counter
no_vals = Counter(df.index.labels[0]).most_common(1)[0][1]
print(no_vals)
3
mux = pd.MultiIndex.from_product([df.index.levels[0],
np.arange(no_vals)], names=['region','id'])
print (mux)
MultiIndex(levels=[['intro', 'mid', 'start', 'title'], [0, 1, 2]],
codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]],
names=['region', 'id'])
df = df.reset_index(level=[1, 2]).set_index(df.groupby(level=0).cumcount(), append=True)
df = (df.reindex(mux).reset_index(level=1, drop=True)
.set_index(['feat_index', 'position_in_region'], append=True))
print (df)
document_0 document_1 document_2
region feat_index position_in_region
intro 9.0 422.0 0.39 0.80 0.81
3.0 5834.0 0.79 0.06 0.15
0.0 8813.0 0.01 0.92 0.19
mid 7.0 3187.0 0.55 0.74 0.17
8.0 9407.0 0.99 0.06 0.11
NaN NaN NaN NaN NaN
start 2.0 997.0 0.67 0.96 0.34
4.0 3154.0 0.61 0.57 0.80
1.0 8416.0 0.84 0.19 0.03
title 6.0 5408.0 0.15 0.29 0.67
5.0 8421.0 0.23 0.24 0.46
NaN NaN NaN NaN NaN
添加回答
举报