diff --git a/retriever/tools/keyword_find.py b/retriever/tools/keyword_find.py index 3d19b81..f5122e1 100644 --- a/retriever/tools/keyword_find.py +++ b/retriever/tools/keyword_find.py @@ -29,6 +29,7 @@ def docx_find(file_path: str, keyword_list: list) -> dict: paragraph_keyword = [] for para in doc_text.split('\n'): + para = re.sub(r'\s+', '', para) # 删除段落中空格 this_para_keyword = [keyword for keyword in keyword_list if keyword in para] # 查找该段落中的敏感词 if this_para_keyword: