首页猿问在 python...

在 python 中使用“类”搜索多个字符串

Python

噜噜哒 2022-07-19 20:10:45

我有这个代码，它可以找到单词 |1234| 并复制所有行直到 abcd| 找到，它不会复制包含 abcd| 的行。我想让这段代码适用于多个字符串。例如，搜索该行是否包含 |1234| 或 |56789| 或 |54321| 并复制所有包含这些单词的行，直到行 abcd| （这不会改变）。顺便说一句，代码还将所有 txt 合二为一（我需要搜索多个文件。是否可以修改代码使其可以搜索多个 start_marker？如何？有人可以帮我吗？import globimport osclass Collector: def __init__(self, filename, start_marker, stop_marker): self.toggle_collect = False self.f = open(filename, encoding='utf-8', errors='ignore') self.m1 = '|1234|' lf.m2 = 'abcd|' def __enter__ (self): return self def __exit__ (self, exc_type, exc_value, traceback): self.f.close() def __iter__(self): return self def __next__(self): while True: r = next(self.f) if self.m1 in r: # found the start-collecting marker self.toggle_collect = True elif self.m2 in r: # found the stop-collecting marker self.toggle_collect = False continue if self.toggle_collect: # we are collecting return r # .rstrip() # provide rowsrc_path = "e:/teste/Filtrados/"dst_path = "e:/teste/FiltradosFinal/"filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))for x in filelist: print(f"Processing file {x}") with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \ Collector(os.path.join(src_path, x), '1234', 'abcd') as c: for r in c: f.write(r)read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")with open("e:/teste/teste.txt", "wb") as outfile: for f in read_files: print("Combinando arquivos") with open(f, "rb") as infile:输入00000|12333|somethingwordanother_oneabcd|00000|12320|somethingwordanother_oneabcd|00000|12321|somethingwordanother_oneabcd|输出00000|12333|somethingwordanother_oneabcd|00000|12321|somethingwordanother_oneabcd|我想复制所有，例如，'|12333|' 之间的行或“|12321|” 直到找到结束标记 abcd|。我的代码做到了，但只有当我设置一个标记时，我才想要多个开始标记

查看完整描述

1 回答

慕姐8265434

TA贡献1813条经验获得超2个赞

import glob

import os

class Collector:

def __init__(self, filename, start_marker, stop_marker):

self.toggle_collect = False

self.f = open(filename, encoding='utf-8', errors='ignore')

self.m1s = ['|1234|', 'other_word1', 'other_word2']

self.m2 = 'abcd|'

def __enter__(self):

return self

def __exit__(self, exc_type, exc_value, traceback):

self.f.close()

def __iter__(self):

return self

def __next__(self):

while True:

r = next(self.f)

for m1 in self.m1s:

if self.m2 in r: # found the stop-collecting marker

break

if m1 in r: # found the start-collecting marker

return r # .rstrip() # provide row

src_path = "e:/teste/Filtrados/"

dst_path = "e:/teste/FiltradosFinal/"

filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))

for x in filelist:

print(f"Processing file {x}")

with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \

Collector(os.path.join(src_path, x), '1234', 'abcd') as c:

for r in c:

f.write(r)

read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")

with open("e:/teste/teste.txt", "wb") as outfile:

for f in read_files:

print("Combinando arquivos")

with open(f, "rb") as infile:

我没有测试代码，因为您没有提供任何方便的输入或所需的输出，但这似乎可以满足您的要求。

我建议使用有意义的名称而不是m1and m2，这样您就可以在代码变大时调试自己的代码（更不用说让其他人阅读它了）。

反对回复 2022-07-19

1 回答
0 关注
67 浏览

关注

添加回答

0/150

提交

取消

热搜

最近搜索清空

在 python 中使用“类”搜索多个字符串

在 python 中使用“类”搜索多个字符串

1 回答

添加回答