我这里有字典:dict_assembly = {'ind1gene1':'individual1', 'ind1gene2':'individual1','ind1gene3':'individual1', 'ind2gene1':'individual2', 'ind2gene2':'individual2','ind2gene3':'individual2', 'ind3gene1':'individual3', 'ind3gene2':'individual3','ind3gene3':'individual3','ind4gene1':'individual4','ind4gene2':'individual4','ind4gene3':'individual4','ind4gene4':'individual4'} dict_bhit = {'ind1gene1':'AAAAA', 'ind1gene2':'BBBBB','ind1gene3':'CCCCC', 'ind2gene1':'AAAAA', 'ind2gene2':'BBBBB','ind2gene3':'BBBBB', 'ind3gene1':'AAAAA', 'ind3gene2':'BBBBB','ind3gene3':'CCCCC','ind4gene1':'AAAAA','ind4gene2':'BBBBB','ind4gene3':'CCCCC','ind4gene4':'DDDDD'}dict_identity = {'ind1gene1':'98','ind2gene1':'96','ind3gene1':'95','ind4gene1':'96','indi5gene1':'94','ind1gene2':'67','ind2gene2':'76','ind3gene2':'80','ind4gene2':'77','ind5gene2':'76','ind1gene3':'98','ind2gene3':'97','ind3gene3':'96','ind4gene3':'96','ind4gene4':'40'}data = {} # temporary dictionary用于此示例的代码分为两个块。第一部分: import pandas as pd import time start = time.time() matrix_file = open("concatenated.matrix", "w" ) col_subject = ['query', 'subject'] df_accession = pd.DataFrame(dict_bhit.items(), columns=col_subject) col_genome = ['query', 'genome'] df_assembly = pd.DataFrame(dict_assembly.items(), columns=col_genome) df_assembly['subject'] = df_assembly['query'].map(df_accession.set_index('query')['subject']) matrix = pd.get_dummies(df_assembly.set_index('genome')['subject']).max(level=0).max(level=0, axis=1) matrix.to_csv(matrix_file, sep='\t', header=True, index=True) print matrix end = time.time() print 'This step spent',round(end - start, 4), 'seconds\n'
添加回答
举报
0/150
提交
取消