From 0ee978ba4e5e16249c215e49189740c9e10e22d6 Mon Sep 17 00:00:00 2001
From: raiot <raiot.lee@hotmail.com>
Date: Thu, 24 Aug 2023 20:06:54 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E9=81=BF=E5=85=8D=E6=AE=B5=E8=90=BD?=
 =?UTF-8?q?=E7=A9=BA=E6=A0=BC=E5=BD=B1=E5=93=8D=E6=A3=80=E6=B5=8B=E7=BB=93?=
 =?UTF-8?q?=E6=9E=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 retriever/tools/keyword_find.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/retriever/tools/keyword_find.py b/retriever/tools/keyword_find.py
index 3d19b81..f5122e1 100644
--- a/retriever/tools/keyword_find.py
+++ b/retriever/tools/keyword_find.py
@@ -29,6 +29,7 @@ def docx_find(file_path: str, keyword_list: list) -> dict:
     paragraph_keyword = []
 
     for para in doc_text.split('\n'):
+        para = re.sub(r'\s+', '', para)  # 删除段落中空格
         this_para_keyword = [keyword for keyword in keyword_list if keyword in para]  # 查找该段落中的敏感词
 
         if this_para_keyword: