1 回答
TA贡献1813条经验 获得超2个赞
import glob
import os
class Collector:
def __init__(self, filename, start_marker, stop_marker):
self.toggle_collect = False
self.f = open(filename, encoding='utf-8', errors='ignore')
self.m1s = ['|1234|', 'other_word1', 'other_word2']
self.m2 = 'abcd|'
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.f.close()
def __iter__(self):
return self
def __next__(self):
while True:
r = next(self.f)
for m1 in self.m1s:
if self.m2 in r: # found the stop-collecting marker
break
if m1 in r: # found the start-collecting marker
return r # .rstrip() # provide row
src_path = "e:/teste/Filtrados/"
dst_path = "e:/teste/FiltradosFinal/"
filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))
for x in filelist:
print(f"Processing file {x}")
with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \
Collector(os.path.join(src_path, x), '1234', 'abcd') as c:
for r in c:
f.write(r)
read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")
with open("e:/teste/teste.txt", "wb") as outfile:
for f in read_files:
print("Combinando arquivos")
with open(f, "rb") as infile:
我没有测试代码,因为您没有提供任何方便的输入或所需的输出,但这似乎可以满足您的要求。
我建议使用有意义的名称而不是m1and m2,这样您就可以在代码变大时调试自己的代码(更不用说让其他人阅读它了)。
添加回答
举报