3 回答
TA贡献1842条经验 获得超21个赞
下面的代码应该可以为您带来理想的结果
# create data frame
df = sqlContext.createDataFrame(
[(1,'Y','Y',0,0,0,2,'Y','N','Y','Y'),
(2,'N','Y',2,1,2,3,'N','Y','Y','N'),
(3,'Y','N',3,1,0,0,'N','N','N','N'),
(4,'N','Y',5,0,1,0,'N','N','N','Y'),
(5,'Y','N',2,2,0,1,'Y','N','N','Y'),
(6,'Y','Y',0,0,3,6,'Y','N','Y','N'),
(7,'N','N',1,1,3,4,'N','Y','N','Y'),
(8,'Y','Y',1,1,2,0,'Y','Y','N','N')
],
('id', 'compatible', 'product', 'ios', 'pc', 'other', 'devices', 'customer', 'subscriber', 'circle', 'smb')
)
# Find data types of data frame
datatypes_List = df.dtypes
# Querying datatypes_List gives you column and its data type as a tuple
datatypes_List
[('id', 'bigint'), ('compatible', 'string'), ('product', 'string'), ('ios', 'bigint'), ('pc', 'bigint'), ('other', 'bigint'), ('devices', 'bigint'), ('customer', 'string'), ('subscriber', 'string'), ('circle', 'string'), ('smb', 'string')]
# create empty dictonary to store output values
dict_count = {}
# Loop statement to count number of times the data type is present in the data frame
for x, y in datatypes_List:
dict_count[y] = dict_count.get(y, 0) + 1
# query dict_count to find the number of times a data type is present in data frame
dict_count
添加回答
举报