From 0ee978ba4e5e16249c215e49189740c9e10e22d6 Mon Sep 17 00:00:00 2001 From: raiot Date: Thu, 24 Aug 2023 20:06:54 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E9=81=BF=E5=85=8D=E6=AE=B5=E8=90=BD?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=E5=BD=B1=E5=93=8D=E6=A3=80=E6=B5=8B=E7=BB=93?= =?UTF-8?q?=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- retriever/tools/keyword_find.py | 1 + 1 file changed, 1 insertion(+) diff --git a/retriever/tools/keyword_find.py b/retriever/tools/keyword_find.py index 3d19b81..f5122e1 100644 --- a/retriever/tools/keyword_find.py +++ b/retriever/tools/keyword_find.py @@ -29,6 +29,7 @@ def docx_find(file_path: str, keyword_list: list) -> dict: paragraph_keyword = [] for para in doc_text.split('\n'): + para = re.sub(r'\s+', '', para) # 删除段落中空格 this_para_keyword = [keyword for keyword in keyword_list if keyword in para] # 查找该段落中的敏感词 if this_para_keyword: