在 python 中使用“类”搜索多个字符串

我有这个代码,它可以找到单词 |1234| 并复制所有行直到 abcd| 找到,它不会复制包含 abcd| 的行。


我想让这段代码适用于多个字符串。例如,搜索该行是否包含 |1234| 或 |56789| 或 |54321| 并复制所有包含这些单词的行,直到行 abcd| (这不会改变)。顺便说一句,代码还将所有 txt 合二为一(我需要搜索多个文件。是否可以修改代码使其可以搜索多个 start_marker?如何?有人可以帮我吗?


import glob

import os

class Collector:

    def __init__(self, filename, start_marker, stop_marker):

        self.toggle_collect = False

        self.f = open(filename, encoding='utf-8', errors='ignore')

        self.m1 = '|1234|'

        lf.m2 = 'abcd|'


    def __enter__ (self):

        return self


    def __exit__ (self, exc_type, exc_value, traceback):

        self.f.close()


    def __iter__(self):

        return self


    def __next__(self):

        while True:

            r = next(self.f)

            if self.m1 in r:  # found the start-collecting marker

                self.toggle_collect = True

            elif self.m2 in r:  # found the stop-collecting marker

                self.toggle_collect = False

                continue

            if self.toggle_collect:  # we are collecting

                return r  # .rstrip()  # provide row



src_path = "e:/teste/Filtrados/"

dst_path = "e:/teste/FiltradosFinal/"

filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))

for x in filelist:

    print(f"Processing file {x}")

    with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \

         Collector(os.path.join(src_path, x), '1234', 'abcd') as c:

        for r in c:

            f.write(r)


read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")

with open("e:/teste/teste.txt", "wb") as outfile:

    for f in read_files:

        print("Combinando arquivos")

        with open(f, "rb") as infile:

输入


00000|12333|

something

word

another_one

abcd|

00000|12320|

something

word

another_one

abcd|

00000|12321|

something

word

another_one

abcd|

输出


00000|12333|

something

word

another_one

abcd|

00000|12321|

something

word

another_one

abcd|

我想复制所有,例如,'|12333|' 之间的行 或“|12321|” 直到找到结束标记 abcd|。我的代码做到了,但只有当我设置一个标记时,我才想要多个开始标记


噜噜哒
浏览 69回答 1
1回答

慕姐8265434

import globimport osclass Collector:    def __init__(self, filename, start_marker, stop_marker):        self.toggle_collect = False        self.f = open(filename, encoding='utf-8', errors='ignore')        self.m1s = ['|1234|', 'other_word1', 'other_word2']        self.m2 = 'abcd|'    def __enter__(self):        return self    def __exit__(self, exc_type, exc_value, traceback):        self.f.close()    def __iter__(self):        return self    def __next__(self):        while True:            r = next(self.f)            for m1 in self.m1s:                if self.m2 in r:  # found the stop-collecting marker                   break                if m1 in r:  # found the start-collecting marker                   return r  # .rstrip()  # provide rowsrc_path = "e:/teste/Filtrados/"dst_path = "e:/teste/FiltradosFinal/"filelist = (fn for fn in os.listdir(src_path) if fn.endswith(".txt"))for x in filelist:    print(f"Processing file {x}")    with open(os.path.join(dst_path, x), 'w', encoding='utf-8', errors='ignore') as f, \            Collector(os.path.join(src_path, x), '1234', 'abcd') as c:        for r in c:            f.write(r)read_files = glob.glob("e:/teste/FiltradosFinal//*.txt")with open("e:/teste/teste.txt", "wb") as outfile:    for f in read_files:        print("Combinando arquivos")        with open(f, "rb") as infile:我没有测试代码,因为您没有提供任何方便的输入或所需的输出,但这似乎可以满足您的要求。我建议使用有意义的名称而不是m1and m2,这样您就可以在代码变大时调试自己的代码(更不用说让其他人阅读它了)。
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python