diff --git a/KeywordRetriever/settings.py b/KeywordRetriever/settings.py
index b603aa3..82945ad 100644
--- a/KeywordRetriever/settings.py
+++ b/KeywordRetriever/settings.py
@@ -11,6 +11,7 @@ https://docs.djangoproject.com/en/4.2/ref/settings/
"""
from pathlib import Path
+import os
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
@@ -123,6 +124,7 @@ STATIC_URL = 'static/'
STATICFILES_DIRS = [
BASE_DIR / 'static',
]
+STATIC_ROOT = os.path.join(BASE_DIR, 'static', 'static_root')
# Default primary key field type
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
diff --git a/KeywordRetriever/urls.py b/KeywordRetriever/urls.py
index b004c58..8ff551e 100644
--- a/KeywordRetriever/urls.py
+++ b/KeywordRetriever/urls.py
@@ -24,3 +24,5 @@ urlpatterns = [
path("unicorn/", include("django_unicorn.urls")),
path('', include('retriever.urls')),
]
+
+admin.site.site_header = '敏感词维护'
\ No newline at end of file
diff --git a/retriever/admin.py b/retriever/admin.py
index fe8e86f..d11f365 100644
--- a/retriever/admin.py
+++ b/retriever/admin.py
@@ -1,7 +1,15 @@
from django.contrib import admin
+from django.contrib.auth.models import Group, User
+from django_celery_results.models import TaskResult, GroupResult
# Register your models here.
from retriever.models import RetrieverTask, UploadFile, KeywordParagraph, Keywords
admin.site.register(Keywords)
+
+# hide the user and group models
+admin.site.unregister(Group)
+admin.site.unregister(User)
+admin.site.unregister(TaskResult)
+admin.site.unregister(GroupResult)
diff --git a/retriever/forms.py b/retriever/forms.py
index 44e4a45..384d801 100644
--- a/retriever/forms.py
+++ b/retriever/forms.py
@@ -12,4 +12,4 @@ class SpaceSeparatedField(forms.CharField):
class UploadForm(forms.Form):
attachments = MultiFileField(min_num=1, max_num=10, max_file_size=1024 * 1024 * 64,
- attrs={'class': 'file-input is-primary', 'accept': '.docx, .doc, .dot, .pptx, .ppt, .pdf, .xls'})
+ attrs={'class': 'file-input is-primary', 'accept': '.docx, .doc, .dot, .pptx, .ppt, .pdf, .xls, .xlsx, .txt', 'id': 'file-input'})
diff --git a/retriever/migrations/0001_initial.py b/retriever/migrations/0001_initial.py
new file mode 100644
index 0000000..b42ccd2
--- /dev/null
+++ b/retriever/migrations/0001_initial.py
@@ -0,0 +1,61 @@
+# Generated by Django 4.2.4 on 2023-08-25 14:14
+
+import django.core.validators
+from django.db import migrations, models
+import django.db.models.deletion
+import uuid
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='Keywords',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('keyword', models.CharField(max_length=64, unique=True, verbose_name='敏感词')),
+ ('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
+ ('keyword_created', models.DateTimeField(auto_now_add=True)),
+ ],
+ options={
+ 'verbose_name': '敏感词',
+ 'verbose_name_plural': '敏感词',
+ },
+ ),
+ migrations.CreateModel(
+ name='RetrieverTask',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('task_uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
+ ('task_keywords', models.CharField(max_length=1024)),
+ ('task_status', models.BooleanField(default=False)),
+ ('task_started', models.BooleanField(default=False)),
+ ('task_created', models.DateTimeField(auto_now_add=True)),
+ ],
+ ),
+ migrations.CreateModel(
+ name='UploadFile',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('file_id', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
+ ('file_name', models.CharField(max_length=100)),
+ ('is_checked', models.BooleanField(default=False)),
+ ('file', models.FileField(upload_to='uploads/', validators=[django.core.validators.FileExtensionValidator(allowed_extensions=['docx'])])),
+ ('related_task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='attachment', to='retriever.retrievertask')),
+ ],
+ ),
+ migrations.CreateModel(
+ name='KeywordParagraph',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('keyword', models.CharField(max_length=1024)),
+ ('paragraph', models.TextField()),
+ ('related_file', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='keyword_paragraph', to='retriever.uploadfile')),
+ ],
+ ),
+ ]
diff --git a/retriever/models.py b/retriever/models.py
index bb02bc7..9370a04 100644
--- a/retriever/models.py
+++ b/retriever/models.py
@@ -37,7 +37,7 @@ class UploadFile(models.Model):
# 以列表形式返回文件所属的所有段落的keyword
keyword_in_paragraph = self.keyword_paragraph.all().values_list('keyword', flat=True)
flat_keyword = [item.strip("[]' ") for sublist in keyword_in_paragraph for item in sublist.split(',')]
- print(flat_keyword)
+ # print(flat_keyword)
# 排除重复的keyword 并返回以逗号分隔的字符串
return ','.join(set(flat_keyword))
# return self.keyword_paragraph.all().values_list('keyword', flat=True)
@@ -53,8 +53,8 @@ class KeywordParagraph(models.Model):
class Keywords(models.Model):
- keyword = models.CharField(max_length=64)
- is_active = models.BooleanField(default=True)
+ keyword = models.CharField(max_length=64, unique=True, blank=False, null=False, verbose_name='敏感词')
+ is_active = models.BooleanField(default=True, verbose_name='是否启用')
keyword_created = models.DateTimeField(auto_now_add=True)
def __str__(self):
@@ -64,3 +64,7 @@ class Keywords(models.Model):
def active_keyword_list(self):
# 以列表形式返回所有is_active=True的keyword
return Keywords.objects.filter(is_active=True).values_list('keyword', flat=True)
+
+ class Meta:
+ verbose_name = '敏感词'
+ verbose_name_plural = '敏感词'
diff --git a/retriever/tasks.py b/retriever/tasks.py
index 1722109..44fb14f 100644
--- a/retriever/tasks.py
+++ b/retriever/tasks.py
@@ -1,5 +1,6 @@
# app/tasks.py, 可以复用的task 改这个文件记得重启 Celery!!!
import ast
+import time
from celery import shared_task
from .models import RetrieverTask, UploadFile, KeywordParagraph
@@ -15,8 +16,13 @@ def start_retriever_job(task_id):
for each in current_task.attachment.all():
if not each.is_checked:
- result_dict = util_keyword_find(each.file_path, task_keywords)
- UploadFile.objects.filter(file_id=each.file_id).update(is_checked=True) # 更新is_checked字段
+ try:
+ result_dict = util_keyword_find(each.file_path, task_keywords)
+ UploadFile.objects.filter(file_id=each.file_id).update(is_checked=True) # 更新is_checked字段
+
+ except Exception as e:
+ print(e)
+ result_dict = {'file_name': each.file_name, 'find_list': ['该文件检查程序出错,请联系管理员'], 'paragraph_keyword': ['出错']}
KeywordParagraph.objects.bulk_create(
[KeywordParagraph(related_file=each, keyword=para_keyword, paragraph=paragraph) for paragraph, para_keyword in
@@ -24,4 +30,10 @@ def start_retriever_job(task_id):
elif each.is_checked:
continue
RetrieverTask.objects.filter(task_uuid=task_id).update(task_status=True)
+
+ time.sleep(600)
+ # delete task and related files
+ for each in current_task.attachment.all():
+ each.file.delete(save=True)
+ RetrieverTask.objects.filter(task_uuid=task_id).delete()
return task_id
diff --git a/retriever/templates/task_viewer.html b/retriever/templates/task_viewer.html
index 315ec47..de4bc54 100644
--- a/retriever/templates/task_viewer.html
+++ b/retriever/templates/task_viewer.html
@@ -1,6 +1,10 @@
{% extends 'base.html' %}
{% load unicorn %}
-
+{% block navbar_item %}
+
+ 返回首页
+
+{% endblock %}
{% block content %}
@@ -22,11 +26,16 @@
{{ task_file.file_name }}
-
检索到的敏感词:{{ task_file.file_keyword_str }}
-
- {% for paragraph in task_file.keyword_paragraph.all %}
-
{{ forloop.counter }}.{{ paragraph.paragraph | safe }}
- {% endfor %}
+ {% if task_file.file_keyword_str %}
+
检索到的敏感词:{{ task_file.file_keyword_str }}
+
+ {% for paragraph in task_file.keyword_paragraph.all %}
+
{{ forloop.counter }}.{{ paragraph.paragraph | safe }}
+ {% endfor %}
+ {% endif %}
+ {% if not task_file.file_keyword_str %}
+
未检索到敏感词
+ {% endif %}
diff --git a/templates/base.html b/templates/base.html
index 03d16b3..fa31961 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -11,7 +11,7 @@
寻章智搜
-
+
{% unicorn_scripts %}
@@ -25,8 +25,12 @@
@@ -62,13 +66,13 @@
- Rent Template 2020 © Aldi Duzha
Terms of use and
Privacy policy.
-
Fair Housing
+ Rent Template 2020 © Aldi Duzha
diff --git a/templates/index.html b/templates/index.html
index f79bdbd..030181c 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -2,27 +2,32 @@
{% load static %}
{% load unicorn %}
-
+{% block navbar_item %}
+
+ 敏感词库维护
+
+{% endblock %}
{% block content %}
- Search hundreds of thousands of apartments, condos and houses for rent.
+ Sensitive Word Detection Tool for Documents
- 文件敏感词检测工具
+ 敏感词检测工具
-
批量上传
+
可批量上传,支持 .docx, .doc, .dot, .pptx, .ppt, .pdf, .xls, .xlsx, .txt(UTF-8) 格式文件
+
+{% endblock %}
+
+{% block script %}
+
+{# show file name after file selected#}
+
+ // 获取文件选择输入框和文件名显示容器
+ const fileInput = document.getElementById('file-input');
+ const fileListDiv = document.getElementById('file-list');
+
+ // 添加事件监听器来处理文件选择
+ fileInput.addEventListener('change', function() {
+ // 获取所选文件列表
+ const selectedFiles = fileInput.files;
+
+ // 创建一个用于显示文件名的字符串
+ let fileListText = '所选文件:
';
+
+ // 遍历文件列表并添加文件名到字符串
+ for (let i = 0; i < selectedFiles.length; i++) {
+ const fileName = selectedFiles[i].name;
+ fileListText += `${fileName}
`;
+ }
+
+ // 将文件名字符串插入到页面上的容器中
+ fileListDiv.innerHTML = fileListText;
+ });
+
{% endblock %}
\ No newline at end of file