getitem错误
# coding = utf-8 #
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats
from scipy.stats import mode
df = pd.read_csv('train.csv')
label = df['TARGET']
df = df.drop(['ID', 'TARGET'], axis=1)
def fill_fre_top_5(x):
if(len(x))<=5:
new_array = np.null(5, np.nan)
new_array[0:len(x)] = x
return new_array
def eda_analysis( missSet = [np.nan, 9999999999, -999999], df = None):
df_eda_summary = eda_analysis(missSet=[np.nan, 9999999999, -999999], df=df.iloc[:, 0:3])
# 1Count #
count_un = df.apply(lambda x: len(x.unique()))
count_un = count_un.to_frame['count']
# 2Zero Values #
count_zero = df.apply(lambda x: np.sum(x == 0))
count_zero = count_zero.to_frame['count_zero']
# 3 Mean #
df_mean = df.apply(lambda x: np.mean(x[~np.isin(x, missSet)]))
df_mean = df_mean.to_frame['mean']
# 4 Median #
df_median = df.apply(lambda x: np.median(x[~np.isin(x, missSet)]))
df_median = df_median.to_frame['median']
# 5 Mode #
df_mode = df.apply(lambda x: scipy.stats.mode(x[~np.isin(x, missSet)])[0][0])
df_mode = df_mode.to_frame['mode']
#6 Mode Percentage#
df_mode_count = df.apply(lambda x: scipy.stats.mode(x[~np.isin(x, missSet)])[1][0])
df_mode_count = df_mode_count.to_frame['mode_count']
df_mode_perct = df_mode_count / df.shape[0]
df_mode_perct.columns = ['mode_perct']
#7 Min Value#
df_min = df.apply(lambda x: np.min(x[~np.isin(x, missSet)]))
df_min = df_min.to_frame['min']
#8 Max Value#
df_max = df.apply(lambda x: np.max(x[~np.isin(x, missSet)]))
df_max = df_min.to_frame['max']
#9 Quantile Values#
json_quantile = {}
for i, name in enumerate(df.columns):
json_quantile[name] = np.percentile(df[name][~np.isin(df[name], missSet)], (1, 5, 25, 50, 75, 95, 99))
df_quantile = pd.DataFrame(json_quantile)[df.columns].T
df_quantile.columns = ['quan01', 'quan05', 'quan25', 'quan50', 'quan75', 'quan95', 'quan99']
#10
请问81行 直接带入数据为何出错