chore: 删除废弃的引用
This commit is contained in:
parent
e6baff6681
commit
1ddb4e83dd
|
@ -1,4 +1,3 @@
|
|||
# from docx import Document
|
||||
import docx2txt
|
||||
import re
|
||||
|
||||
|
@ -29,37 +28,6 @@ def docx_find(file_path: str, keyword_list: list) -> dict:
|
|||
find_list = []
|
||||
paragraph_keyword = []
|
||||
|
||||
# 使用 python-docx 模块的检索 已废弃
|
||||
# # 检索所有段落
|
||||
# for para in doc_file.paragraphs:
|
||||
# this_para_keyword = []
|
||||
# for keyword in keyword_list: # 可能存在性能问题
|
||||
# if keyword in para.text and not this_para_keyword: # 如果keyword在para.text中,且该段落没有被标记过
|
||||
# find_list.append(para.text)
|
||||
# this_para_keyword.append(keyword)
|
||||
# elif keyword in para.text and this_para_keyword: # 如果keyword在para.text中,且该段落已经被标记过
|
||||
# this_para_keyword.append(keyword)
|
||||
# else:
|
||||
# continue
|
||||
#
|
||||
# if this_para_keyword:
|
||||
# # 若该段落被标记过,则将该段落的所有keyword加入到paragraph_keyword中
|
||||
# paragraph_keyword.append(this_para_keyword)
|
||||
# # 检索所有页眉
|
||||
# for section in doc_file.sections:
|
||||
# header = section.header
|
||||
# if header is not None:
|
||||
# for para in header.paragraphs:
|
||||
# this_para_keyword = []
|
||||
# for keyword in keyword_list:
|
||||
# if keyword in para.text and not this_para_keyword:
|
||||
# find_list.append(para.text)
|
||||
# this_para_keyword.append(keyword)
|
||||
# elif keyword in para.text and this_para_keyword:
|
||||
# this_para_keyword.append(keyword)
|
||||
# else:
|
||||
# continue
|
||||
|
||||
for para in doc_text.split('\n'):
|
||||
this_para_keyword = [keyword for keyword in keyword_list if keyword in para] # 查找该段落中的敏感词
|
||||
|
||||
|
|
Loading…
Reference in New Issue