1 回答
TA贡献1842条经验 获得超12个赞
import pandas as pd
# list to save all dataframe from all tables in all files
df_list = list()
# list of files to load
list_of_files = ['test.html']
# iterate through your files
for file in list_of_files:
# create a list of dataframes from the tables in the file
dfl = pd.read_html(file, match='Game Name')
# fix the headers and columns
for d in dfl:
# select row 1 as the headers
d.columns = d.iloc[1]
# select row 0, column 0 as the platform
d['platform'] = d.iloc[0, 0]
# selection row 2 and below as the data, row 0 and 1 were the headers
d = d.iloc[2:]
# append the cleaned dataframe to df_list
df_list.append(d.copy())
# create a single dataframe
df = pd.concat(df_list).reset_index(drop=True)
# create a list of dicts from df
records = df.to_dict('records')
print(records)
[out]:
[{'Game Name': 'GoW', 'Price': '49.99', 'platform': 'PS4'},
{'Game Name': 'FF VII R', 'Price': '59.99', 'platform': 'PS4'},
{'Game Name': 'Gears 5', 'Price': '49.99', 'platform': 'XBX'},
{'Game Name': 'Forza 5', 'Price': '59.99', 'platform': 'XBX'}]
添加回答
举报