2 回答

TA贡献1865条经验 获得超7个赞
在编写代码时,除了“一切”之外,尝试执行脚本并不是一个好主意(不是真的)。此外,您访问数据的方式似乎不正确,但这应该有效:
import numpy as np
import glob
import os
from os import listdir
from docx import Document
import re
import win32com.client as win32
import traceback
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'C:\Users\aac1928\Desktop\Test'
def find_word():
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'/Users/marc/Documents'
#Searches for Keywords in Converted Text Documents
for root, dirs, files in os.walk(documents):
for filename in files:
print filename
if filename.endswith(".doc") or filename.endswith("docx"):
file_path = os.path.join(root, filename)
with open(file_path, "rb") as f:
doc = Document(f)
if search_doc(doc, Keyword):
print file_path
def search_doc(doc, Keyword):
for table in doc.tables:
for j, column in enumerate(table.columns):
for i, row in enumerate(table.rows):
if Keyword in table.cell(j, i).text:
return True
return False
这也确保您在找到关键字时停止在文档中查找并移至下一个文档,而不是在文档的多个表格中找到该关键字时多次打印文件名。

TA贡献1775条经验 获得超11个赞
我是 Python 的新手,但是通过添加以下代码:
except Exception:
pass
traceback.print_exc()
在您的代码中,我可以看到未定义 Cell
如果您将循环更改为一段时间,它将起作用,例如
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
print(filename)
continue
希望能帮助到你
更新 :
import numpy as np
import glob
import os
from os import listdir
from docx import Document
import re
import win32com.client as win32
import traceback
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'C:\Users\aac1928\Desktop\Test'
#Searches for Keywords in Converted Text Documents
for root, dirs, files in os.walk(documents, onerror=None):
print("Here 1")
for filename in files:
print(filename)
if filename.endswith(".doc") or filename.endswith("docx"):
file_path = os.path.join(root, filename)
print(file_path)
try:
with open(file_path, "rb") as f:
doc = Document(f)
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
print(filename)
continue
except Exception:
pass
traceback.print_exc()
更新 2:
import numpy as np
import glob
import os
from os import listdir
from docx import Document
import re
import win32com.client as win32
import traceback
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'C:\Users\aac1928\Desktop\Test'
documentsWithKeyword = []
#Searches for Keywords in Converted Text Documents
for root, dirs, files in os.walk(documents, onerror=None):
print("Here 1")
for filename in files:
print(filename)
if filename.endswith(".doc") or filename.endswith("docx"):
file_path = os.path.join(root, filename)
print(file_path)
try:
with open(file_path, "rb") as f:
doc = Document(f)
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
documentsWithKeyword.append(filename)
continue
except Exception:
pass
traceback.print_exc()
# remove duplicates
documentsWithKeyword = list(set(documentsWithKeyword))
documentsWithKeyword.sort()
#print documents that have the word
for docwithKeyword in documentsWithKeyword
print(docwithKeyword)
分享
添加回答
举报