import os import pandas as pd import numpy as np import math def train_data_reads(path): data_directory = path + "/data" #获取数据路径 data_name_list = os.listdir(data_directory) file_name = data_name_list[0] #数据的路径:data_path data_path = data_directory + "/" + file_name name,extension = file_name.split(".") if extension == "csv": try: data = pd.read_csv(data_path,encoding = "gbk") except: data = pd.read_csv(data_path,encoding = "utf-8") elif extension == "txt": try: data = pd.read_csv(data_path,encoding = "gbk",sep = "\t") except: data = pd.read_csv(data_path,encoding = "utf-8",sep = "\t") else: data = pd.read_excel(data_path) return data def feature_label_split(data): #获取dataFrame的列名 name_list = data.columns.values.tolist() label_name = name_list[len(name_list) - 1] #将数据中label为空的数据删除 data = data[np.isnan(data[label_name]) == False] #拆分特征与标签 x = data.drop([label_name],axis = 1) y = data[label_name] y = list(map(lambda x: 0 if x >= 7 else 1,y)) y = pd.DataFrame(y,index = data.index) new_data = pd.concat([x,y],axis = 1) return new_data def main(): path = "E:/AnaLinReg/Data_upload_cls" data = train_data_reads(path) data = feature_label_split(data) data.to_csv('D1.csv',encoding = 'utf-8') print ('Done') if __name__ == "__main__": main() #回归特征:最后一列是1~10的数字 #分类特征: 最后一列只有0和1 #回归特征的0~6,对应分类特征的1 #回归特征的7~10,对应分类特征的0
点击查看更多内容
为 TA 点赞
评论
共同学习,写下你的评论
评论加载中...
作者其他优质文章
正在加载中
感谢您的支持,我会继续努力的~
扫码打赏,你说多少就多少
赞赏金额会直接到老师账户
支付方式
打开微信扫一扫,即可进行扫码打赏哦