chore: 删除废弃的引用

This commit is contained in:
raiot 2023-08-24 20:03:27 +08:00
parent e6baff6681
commit 1ddb4e83dd
1 changed files with 0 additions and 32 deletions

View File

@ -1,4 +1,3 @@
# from docx import Document
import docx2txt
import re
@ -29,37 +28,6 @@ def docx_find(file_path: str, keyword_list: list) -> dict:
find_list = []
paragraph_keyword = []
# 使用 python-docx 模块的检索 已废弃
# # 检索所有段落
# for para in doc_file.paragraphs:
# this_para_keyword = []
# for keyword in keyword_list: # 可能存在性能问题
# if keyword in para.text and not this_para_keyword: # 如果keyword在para.text中且该段落没有被标记过
# find_list.append(para.text)
# this_para_keyword.append(keyword)
# elif keyword in para.text and this_para_keyword: # 如果keyword在para.text中且该段落已经被标记过
# this_para_keyword.append(keyword)
# else:
# continue
#
# if this_para_keyword:
# # 若该段落被标记过则将该段落的所有keyword加入到paragraph_keyword中
# paragraph_keyword.append(this_para_keyword)
# # 检索所有页眉
# for section in doc_file.sections:
# header = section.header
# if header is not None:
# for para in header.paragraphs:
# this_para_keyword = []
# for keyword in keyword_list:
# if keyword in para.text and not this_para_keyword:
# find_list.append(para.text)
# this_para_keyword.append(keyword)
# elif keyword in para.text and this_para_keyword:
# this_para_keyword.append(keyword)
# else:
# continue
for para in doc_text.split('\n'):
this_para_keyword = [keyword for keyword in keyword_list if keyword in para] # 查找该段落中的敏感词