import tensorflow as tfimport pandas as pdimport numpy as npimport matplotlib as pltfrom sklearn.model_selection import train_test_splitfrom tensorflow import kerasfrom tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Densefrom sklearn.preprocessing import StandardScalerimport functoolsLABEL_COLUMN = 'Endstage'LABELS = [1, 2, 3, 4]x = pd.read_csv('HCVnew.csv', index_col=False)def get_dataset(file_path, **kwargs): dataset = tf.data.experimental.make_csv_dataset( file_path, batch_size=35, # Artificially small to make examples easier to show. label_name=LABEL_COLUMN, na_value="?", num_epochs=1, ignore_errors=True, **kwargs) return datasetSELECT_COLUMNS = ["Alter", "Gender", "BMI", "Fever", "Nausea", "Fatigue", "WBC", "RBC", "HGB", "Plat", "AST1", "ALT1", "ALT4", "ALT12", "ALT24", "ALT36", "ALT48", "ALT24w", "RNABase", "RNA4", "Baseline", "Endstage"]DEFAULTS = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]temp_dataset = get_dataset("HCVnew.csv", select_columns=SELECT_COLUMNS, column_defaults=DEFAULTS)def pack(features, label): return tf.stack(list(features.values()), axis=-1), labelpacked_dataset = temp_dataset.map(pack)"""for features, labels in packed_dataset.take(1): print(features.numpy()) print() print(labels.numpy())"""NUMERIC_FEATURES = ["Alter", "Gender","BMI", "Fever", "Nausea", "Fatigue", "WBC", "RBC", "HGB", "Plat", "AST1", "ALT1", "ALT4", "ALT12", "ALT24", "ALT36", "ALT48", "ALT24w", "RNABase", "RNA4", "Baseline", "Endstage"]您好,我正在尝试构建一个神经网络,该神经网络可以根据包含患者信息的csv文件预测丙型肝炎,我无法修复错误...我收到错误:KeyError'Endstage',而Endstage是包含相应值(介于1和4之间)并用作标签列的csv列。如果有人有一个想法可以解决我的问题,那么请告诉我。非常感谢您的帮助!
1 回答
小怪兽爱吃肉
TA贡献1852条经验 获得超1个赞
这是因为是你的标签列,框架通过从数据集中删除(弹出)它来帮你一个忙。否则,您的训练数据集也将具有目标类,使其无用。Endstage
将其从任何其他位置删除,使其进入训练集功能。NUMERIC_FEATURES
[编辑]
OP在后续问题(在评论中)中询问了为什么在修复初始问题后,他会遇到错误:
值错误:特征数字不在特征字典中
从外观上看,调用的特征是通过调用 来生成的。后者用于创建 和 ,但这些从不使用。然而,这句话:numeric
PackNumericFeatures
packed_train_data
packed_test_data
numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
假设数据在那里 - 因此出现错误。
添加回答
举报
0/150
提交
取消