2 回答
TA贡献1852条经验 获得超1个赞
在提供的解决方案中,如果 *_r1.fastq 文件的数量与 *_r2.fastq 文件的数量不对应,则会发生此错误,因为该代码仅通过数组索引生成新的 csv 行并且不比较文件名。
我更新了那个解决方案。检查文件名,它们应该是这样的:
/home/data/10080-17_r1.fastq
/home/data/10080-17_r2.fastq
目前我们得到了所有正向文件( *_r1.fastq ),我们正在尝试
在同一目录中找到合适的反向文件( *_r2.fastq )。如果我们没有找到它,则输入“-”而不是反向文件的名称。
请检查代码并阅读评论:
#!/usr/bin/python
import os
import csv
this_dir = os.getcwd()
forward_arr = []
reverse_arr = []
for r, d, f in os.walk(this_dir): # r=root, d=directories, f = files
for file in f:
if "_r1.fastq" in file:
forward_arr.append(os.path.join(r, file))
if "_r2.fastq" in file:
reverse_arr.append(os.path.join(r, file))
# collect result rows in this array
csv_arr = []
# foreach file in forward_arr
for forward_file in forward_arr:
# get sample label from full file path
# 1. split by '/' and select last element:
# /home/data/10080-17_r1.fastq -> 10080-17_r1.fastq
# 2. split by '_r' and select first element: 10080-17_r1.fastq -> 10080-17
sample_label = forward_file.split('/')[-1].split('_r')[0]
# we will search the reverse file for the same forward file in the reverse_arr
# but if we don't find it, in that case we'll put '-'
# instead of the path to the reverse file
reverse_file_result = "-"
# we are looking for a file with the same name and in the same location
# but it should be a reverse file with '_r2' instead of '_r1' in its name
reverse_file_for_search = forward_file.replace("_r1", "_r2")
# search that reverse_file in the reverse_arr
for reverse_file in reverse_arr:
# if we found that file
if reverse_file_for_search == reverse_file:
# assign the reverse file name
# to reverse_file_result variable insted of '-'
reverse_file_result = reverse_file
# go to the next forward_file
break
# in that place we can count md5 for the FORWARD file
md5_url_1 = 0
# in that place we can count md5 for the REVERSE file
md5_url_2 = 0
# append the result row in the csv_arr
csv_arr.append((forward_file, md5_url_1, reverse_file_result,
md5_url_2, sample_label))
# re-write all data to csv file per one iteration
with open('manifest.txt', 'w') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(csv_arr)
TA贡献2021条经验 获得超8个赞
我认为这应该适合您的需要。很难辨认,因为:
names = [s.strip('_1') for s in names]
看起来它不应该做任何事情(我怀疑它应该是“_r1”,就像我在那里修改的第一个循环一样)
import os
import csv
thisdir = os.getcwd()
# Create empty lists
forward = []
reverse = []
names = []
for r, d, f in os.walk(thisdir): # r=root, d=directories, f = files
if f.endswith("_r1.fastq"):
forward.append(os.path.join(r, file))
names.append(f.strip("_r1.fastq"))
elif f.endswith("_r2.fastq"):
reverse.append(os.path.join(r, file))
# write the output to a file
with open('manifest.txt', 'w') as f:
writer = csv.writer(f, delimiter='\t')
for for, rev, nam in zip(forward, reverse, names):
path = [for, 0, rev, o, nam]
writer.writerow(path)
添加回答
举报