prepare for offline env
This commit is contained in:
parent
ddc9efceff
commit
ef9c416eda
|
@ -11,6 +11,7 @@ https://docs.djangoproject.com/en/4.2/ref/settings/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
|
||||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
@ -123,6 +124,7 @@ STATIC_URL = 'static/'
|
||||||
STATICFILES_DIRS = [
|
STATICFILES_DIRS = [
|
||||||
BASE_DIR / 'static',
|
BASE_DIR / 'static',
|
||||||
]
|
]
|
||||||
|
STATIC_ROOT = os.path.join(BASE_DIR, 'static', 'static_root')
|
||||||
|
|
||||||
# Default primary key field type
|
# Default primary key field type
|
||||||
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
|
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
|
||||||
|
|
|
@ -24,3 +24,5 @@ urlpatterns = [
|
||||||
path("unicorn/", include("django_unicorn.urls")),
|
path("unicorn/", include("django_unicorn.urls")),
|
||||||
path('', include('retriever.urls')),
|
path('', include('retriever.urls')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
admin.site.site_header = '敏感词维护'
|
|
@ -1,7 +1,15 @@
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
from django.contrib.auth.models import Group, User
|
||||||
|
from django_celery_results.models import TaskResult, GroupResult
|
||||||
|
|
||||||
# Register your models here.
|
# Register your models here.
|
||||||
|
|
||||||
from retriever.models import RetrieverTask, UploadFile, KeywordParagraph, Keywords
|
from retriever.models import RetrieverTask, UploadFile, KeywordParagraph, Keywords
|
||||||
|
|
||||||
admin.site.register(Keywords)
|
admin.site.register(Keywords)
|
||||||
|
|
||||||
|
# hide the user and group models
|
||||||
|
admin.site.unregister(Group)
|
||||||
|
admin.site.unregister(User)
|
||||||
|
admin.site.unregister(TaskResult)
|
||||||
|
admin.site.unregister(GroupResult)
|
||||||
|
|
|
@ -12,4 +12,4 @@ class SpaceSeparatedField(forms.CharField):
|
||||||
|
|
||||||
class UploadForm(forms.Form):
|
class UploadForm(forms.Form):
|
||||||
attachments = MultiFileField(min_num=1, max_num=10, max_file_size=1024 * 1024 * 64,
|
attachments = MultiFileField(min_num=1, max_num=10, max_file_size=1024 * 1024 * 64,
|
||||||
attrs={'class': 'file-input is-primary', 'accept': '.docx, .doc, .dot, .pptx, .ppt, .pdf, .xls'})
|
attrs={'class': 'file-input is-primary', 'accept': '.docx, .doc, .dot, .pptx, .ppt, .pdf, .xls, .xlsx, .txt', 'id': 'file-input'})
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
# Generated by Django 4.2.4 on 2023-08-25 14:14
|
||||||
|
|
||||||
|
import django.core.validators
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Keywords',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('keyword', models.CharField(max_length=64, unique=True, verbose_name='敏感词')),
|
||||||
|
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
|
||||||
|
('keyword_created', models.DateTimeField(auto_now_add=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': '敏感词',
|
||||||
|
'verbose_name_plural': '敏感词',
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='RetrieverTask',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('task_uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
|
||||||
|
('task_keywords', models.CharField(max_length=1024)),
|
||||||
|
('task_status', models.BooleanField(default=False)),
|
||||||
|
('task_started', models.BooleanField(default=False)),
|
||||||
|
('task_created', models.DateTimeField(auto_now_add=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='UploadFile',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('file_id', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
|
||||||
|
('file_name', models.CharField(max_length=100)),
|
||||||
|
('is_checked', models.BooleanField(default=False)),
|
||||||
|
('file', models.FileField(upload_to='uploads/', validators=[django.core.validators.FileExtensionValidator(allowed_extensions=['docx'])])),
|
||||||
|
('related_task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='attachment', to='retriever.retrievertask')),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='KeywordParagraph',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('keyword', models.CharField(max_length=1024)),
|
||||||
|
('paragraph', models.TextField()),
|
||||||
|
('related_file', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='keyword_paragraph', to='retriever.uploadfile')),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
|
@ -37,7 +37,7 @@ class UploadFile(models.Model):
|
||||||
# 以列表形式返回文件所属的所有段落的keyword
|
# 以列表形式返回文件所属的所有段落的keyword
|
||||||
keyword_in_paragraph = self.keyword_paragraph.all().values_list('keyword', flat=True)
|
keyword_in_paragraph = self.keyword_paragraph.all().values_list('keyword', flat=True)
|
||||||
flat_keyword = [item.strip("[]' ") for sublist in keyword_in_paragraph for item in sublist.split(',')]
|
flat_keyword = [item.strip("[]' ") for sublist in keyword_in_paragraph for item in sublist.split(',')]
|
||||||
print(flat_keyword)
|
# print(flat_keyword)
|
||||||
# 排除重复的keyword 并返回以逗号分隔的字符串
|
# 排除重复的keyword 并返回以逗号分隔的字符串
|
||||||
return ','.join(set(flat_keyword))
|
return ','.join(set(flat_keyword))
|
||||||
# return self.keyword_paragraph.all().values_list('keyword', flat=True)
|
# return self.keyword_paragraph.all().values_list('keyword', flat=True)
|
||||||
|
@ -53,8 +53,8 @@ class KeywordParagraph(models.Model):
|
||||||
|
|
||||||
|
|
||||||
class Keywords(models.Model):
|
class Keywords(models.Model):
|
||||||
keyword = models.CharField(max_length=64)
|
keyword = models.CharField(max_length=64, unique=True, blank=False, null=False, verbose_name='敏感词')
|
||||||
is_active = models.BooleanField(default=True)
|
is_active = models.BooleanField(default=True, verbose_name='是否启用')
|
||||||
keyword_created = models.DateTimeField(auto_now_add=True)
|
keyword_created = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -64,3 +64,7 @@ class Keywords(models.Model):
|
||||||
def active_keyword_list(self):
|
def active_keyword_list(self):
|
||||||
# 以列表形式返回所有is_active=True的keyword
|
# 以列表形式返回所有is_active=True的keyword
|
||||||
return Keywords.objects.filter(is_active=True).values_list('keyword', flat=True)
|
return Keywords.objects.filter(is_active=True).values_list('keyword', flat=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
verbose_name = '敏感词'
|
||||||
|
verbose_name_plural = '敏感词'
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# app/tasks.py, 可以复用的task 改这个文件记得重启 Celery!!!
|
# app/tasks.py, 可以复用的task 改这个文件记得重启 Celery!!!
|
||||||
import ast
|
import ast
|
||||||
|
import time
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
|
||||||
from .models import RetrieverTask, UploadFile, KeywordParagraph
|
from .models import RetrieverTask, UploadFile, KeywordParagraph
|
||||||
|
@ -15,8 +16,13 @@ def start_retriever_job(task_id):
|
||||||
|
|
||||||
for each in current_task.attachment.all():
|
for each in current_task.attachment.all():
|
||||||
if not each.is_checked:
|
if not each.is_checked:
|
||||||
result_dict = util_keyword_find(each.file_path, task_keywords)
|
try:
|
||||||
UploadFile.objects.filter(file_id=each.file_id).update(is_checked=True) # 更新is_checked字段
|
result_dict = util_keyword_find(each.file_path, task_keywords)
|
||||||
|
UploadFile.objects.filter(file_id=each.file_id).update(is_checked=True) # 更新is_checked字段
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
result_dict = {'file_name': each.file_name, 'find_list': ['该文件检查程序出错,请联系管理员'], 'paragraph_keyword': ['出错']}
|
||||||
|
|
||||||
KeywordParagraph.objects.bulk_create(
|
KeywordParagraph.objects.bulk_create(
|
||||||
[KeywordParagraph(related_file=each, keyword=para_keyword, paragraph=paragraph) for paragraph, para_keyword in
|
[KeywordParagraph(related_file=each, keyword=para_keyword, paragraph=paragraph) for paragraph, para_keyword in
|
||||||
|
@ -24,4 +30,10 @@ def start_retriever_job(task_id):
|
||||||
elif each.is_checked:
|
elif each.is_checked:
|
||||||
continue
|
continue
|
||||||
RetrieverTask.objects.filter(task_uuid=task_id).update(task_status=True)
|
RetrieverTask.objects.filter(task_uuid=task_id).update(task_status=True)
|
||||||
|
|
||||||
|
time.sleep(600)
|
||||||
|
# delete task and related files
|
||||||
|
for each in current_task.attachment.all():
|
||||||
|
each.file.delete(save=True)
|
||||||
|
RetrieverTask.objects.filter(task_uuid=task_id).delete()
|
||||||
return task_id
|
return task_id
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% load unicorn %}
|
{% load unicorn %}
|
||||||
|
{% block navbar_item %}
|
||||||
|
<a href="/" class="button is-primary">
|
||||||
|
返回首页
|
||||||
|
</a>
|
||||||
|
{% endblock %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="content">
|
<div class="content">
|
||||||
|
@ -22,11 +26,16 @@
|
||||||
<span class="tag is-info is-medium">
|
<span class="tag is-info is-medium">
|
||||||
{{ task_file.file_name }}
|
{{ task_file.file_name }}
|
||||||
</span>
|
</span>
|
||||||
<b>检索到的敏感词:{{ task_file.file_keyword_str }}</b>
|
{% if task_file.file_keyword_str %}
|
||||||
<br>
|
<b>检索到的敏感词:{{ task_file.file_keyword_str }}</b>
|
||||||
{% for paragraph in task_file.keyword_paragraph.all %}
|
<br>
|
||||||
<p>{{ forloop.counter }}.{{ paragraph.paragraph | safe }}</p>
|
{% for paragraph in task_file.keyword_paragraph.all %}
|
||||||
{% endfor %}
|
<p><b>{{ forloop.counter }}.</b>{{ paragraph.paragraph | safe }}</p>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% if not task_file.file_keyword_str %}
|
||||||
|
<b>未检索到敏感词</b>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
<meta name="description" content="Search houses and apartments for rent anywhere within the US. View floorplans, pricing, images and more. Find your perfect rental.">
|
<meta name="description" content="Search houses and apartments for rent anywhere within the US. View floorplans, pricing, images and more. Find your perfect rental.">
|
||||||
<meta name="keywords" content="bulma, rent, template, apartments, page, website, free, awesome">
|
<meta name="keywords" content="bulma, rent, template, apartments, page, website, free, awesome">
|
||||||
<title>寻章智搜</title>
|
<title>寻章智搜</title>
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.13.0/css/all.min.css">
|
<link rel="stylesheet" href="{% static 'fontawesomecss/css/all.min.css' %}">
|
||||||
<link rel="stylesheet" type="text/css" href="{% static 'bulma.min.css' %}">
|
<link rel="stylesheet" type="text/css" href="{% static 'bulma.min.css' %}">
|
||||||
{% unicorn_scripts %}
|
{% unicorn_scripts %}
|
||||||
</head>
|
</head>
|
||||||
|
@ -25,8 +25,12 @@
|
||||||
<div class="navbar-brand">
|
<div class="navbar-brand">
|
||||||
<a class="navbar-item" href="/">
|
<a class="navbar-item" href="/">
|
||||||
{# <img src="https://bulma.io/images/bulma-logo.png" alt="Bulma Rent" width="80" height="20">#}
|
{# <img src="https://bulma.io/images/bulma-logo.png" alt="Bulma Rent" width="80" height="20">#}
|
||||||
<span class="is-size-3 has-text-weight-semibold">寻章智搜</span>
|
<span class="is-size-3 has-text-weight-semibold">寻章智搜 • </span>
|
||||||
|
<span class="is-size-3 has-text-weight-light" style="color: gray">直升机所保密办</span>
|
||||||
|
|
||||||
|
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navbarTopMain">
|
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navbarTopMain">
|
||||||
<span aria-hidden="true"></span>
|
<span aria-hidden="true"></span>
|
||||||
<span aria-hidden="true"></span>
|
<span aria-hidden="true"></span>
|
||||||
|
@ -35,12 +39,12 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="navbar-menu" id="navbarTopMain">
|
<div class="navbar-menu" id="navbarTopMain">
|
||||||
<div class="navbar-end">
|
<div class="navbar-end">
|
||||||
|
|
||||||
<a href="#" class="navbar-item has-text-weight-semibold">Post a listing</a>
|
|
||||||
<div class="navbar-item">
|
<div class="navbar-item">
|
||||||
<a href="https://aldi.github.io/awesome-bulma-templates/templates/login/login.html" class="button is-primary">
|
{% block navbar_item %}
|
||||||
登录
|
<a href="/admin" class="button is-primary">
|
||||||
|
敏感词库维护
|
||||||
</a>
|
</a>
|
||||||
|
{% endblock %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -62,13 +66,13 @@
|
||||||
<div class="columns m-t-10">
|
<div class="columns m-t-10">
|
||||||
<div class="column">
|
<div class="column">
|
||||||
<nav class="has-text-grey-light">
|
<nav class="has-text-grey-light">
|
||||||
<a href="#" class="has-text-primary">About</a> •
|
<a href="#" class="has-text-primary">About</a>
|
||||||
|
{# •#}
|
||||||
</nav>
|
</nav>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="b-t m-t-30 p-t-30 has-text-grey-light is-size-7">
|
<div class="b-t m-t-30 p-t-30 has-text-grey-light is-size-7">
|
||||||
Rent Template 2020 © Aldi Duzha <br> <a href="#" class="has-text-primary">Terms of use</a> and <a class="has-text-primary" href="#">Privacy policy</a>.
|
Rent Template 2020 © Aldi Duzha <br>
|
||||||
<a href="#" class="has-text-primary">Fair Housing</a>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -2,27 +2,32 @@
|
||||||
{% load static %}
|
{% load static %}
|
||||||
{% load unicorn %}
|
{% load unicorn %}
|
||||||
|
|
||||||
|
{% block navbar_item %}
|
||||||
|
<a href="/admin" class="button is-primary">
|
||||||
|
敏感词库维护
|
||||||
|
</a>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="hero-body p-b-30 ">
|
<div class="hero-body p-b-30 ">
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<h2 class="subtitle">
|
<h2 class="subtitle">
|
||||||
<span class="has-text-centered is-block">
|
<span class="has-text-centered is-block">
|
||||||
Search hundreds of thousands of apartments, condos and houses for rent.
|
Sensitive Word Detection Tool for Documents
|
||||||
</span>
|
</span>
|
||||||
</h2>
|
</h2>
|
||||||
<h1 class="title">
|
<h1 class="title">
|
||||||
<span class="is-size-2 has-text-centered is-block">文件敏感词检测工具</span>
|
<span class="is-size-2 has-text-centered is-block">敏感词检测工具</span>
|
||||||
</h1>
|
</h1>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="notification">
|
<div class="notification">
|
||||||
<span class="has-text-centered is-block">批量上传</span>
|
<span class="has-text-centered is-block">可批量上传,支持 .docx, .doc, .dot, .pptx, .ppt, .pdf, .xls, .xlsx, .txt(UTF-8) 格式文件</span>
|
||||||
|
<br>
|
||||||
<form method="POST" enctype="multipart/form-data">
|
<form method="POST" enctype="multipart/form-data">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<div class="columns">
|
<div class="columns">
|
||||||
<div class="column has-text-centered">
|
<div class="column has-text-centered">
|
||||||
<div class="file is-boxed">
|
<div class="file is-boxed is-centered">
|
||||||
<label class="file-label">
|
<label class="file-label">
|
||||||
{{ form.attachments }}
|
{{ form.attachments }}
|
||||||
<span class="file-cta">
|
<span class="file-cta">
|
||||||
|
@ -30,16 +35,28 @@
|
||||||
<i class="fas fa-upload"></i>
|
<i class="fas fa-upload"></i>
|
||||||
</span>
|
</span>
|
||||||
<span class="file-label">
|
<span class="file-label">
|
||||||
选择多个文件
|
选择文件
|
||||||
</span>
|
</span>
|
||||||
</span>
|
</span>
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="columns">
|
||||||
|
<div class="column"></div>
|
||||||
|
<div class="is-centered column" id="file-list">
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="column"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
<input type="submit" value="提交" class="button is-link is-medium">
|
<div class="is-centered" style="text-align:center">
|
||||||
|
<input type="submit" value="提交" class="button is-link is-medium is-center">
|
||||||
|
</div>
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
@ -49,4 +66,32 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block script %}
|
||||||
|
|
||||||
|
{# show file name after file selected#}
|
||||||
|
|
||||||
|
// 获取文件选择输入框和文件名显示容器
|
||||||
|
const fileInput = document.getElementById('file-input');
|
||||||
|
const fileListDiv = document.getElementById('file-list');
|
||||||
|
|
||||||
|
// 添加事件监听器来处理文件选择
|
||||||
|
fileInput.addEventListener('change', function() {
|
||||||
|
// 获取所选文件列表
|
||||||
|
const selectedFiles = fileInput.files;
|
||||||
|
|
||||||
|
// 创建一个用于显示文件名的字符串
|
||||||
|
let fileListText = '所选文件:<br>';
|
||||||
|
|
||||||
|
// 遍历文件列表并添加文件名到字符串
|
||||||
|
for (let i = 0; i < selectedFiles.length; i++) {
|
||||||
|
const fileName = selectedFiles[i].name;
|
||||||
|
fileListText += `${fileName}<br>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将文件名字符串插入到页面上的容器中
|
||||||
|
fileListDiv.innerHTML = fileListText;
|
||||||
|
});
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
Loading…
Reference in New Issue