import uuid from django.core.validators import FileExtensionValidator from django.db import models import time # Create your models here. class RetrieverTask(models.Model): task_uuid = models.UUIDField(default=uuid.uuid4, unique=True, editable=False) task_keywords = models.CharField(max_length=1024, blank=False, null=False) task_status = models.BooleanField(default=False) task_started = models.BooleanField(default=False) # 任务是否已经开始,避免重复执行 task_created = models.DateTimeField(auto_now_add=True) def __str__(self): return self.task_uuid.__str__() class UploadFile(models.Model): file_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False) related_task = models.ForeignKey('RetrieverTask', related_name='attachment', on_delete=models.CASCADE) file_name = models.CharField(max_length=100) is_checked = models.BooleanField(default=False) file = models.FileField(upload_to='uploads/', validators=[FileExtensionValidator(allowed_extensions=['docx'])]) def __str__(self): return self.file_name @property def file_path(self): return self.file.path @property def file_keyword_str(self): # 以列表形式返回文件所属的所有段落的keyword keyword_in_paragraph = self.keyword_paragraph.all().values_list('keyword', flat=True) flat_keyword = [item.strip("[]' ") for sublist in keyword_in_paragraph for item in sublist.split(',')] print(flat_keyword) # 排除重复的keyword 并返回以逗号分隔的字符串 return ','.join(set(flat_keyword)) # return self.keyword_paragraph.all().values_list('keyword', flat=True) class KeywordParagraph(models.Model): related_file = models.ForeignKey('UploadFile', related_name='keyword_paragraph', on_delete=models.CASCADE) keyword = models.CharField(max_length=1024) paragraph = models.TextField() def __str__(self): return self.keyword class Keywords(models.Model): keyword = models.CharField(max_length=64) is_active = models.BooleanField(default=True) keyword_created = models.DateTimeField(auto_now_add=True) def __str__(self): return self.keyword @property def active_keyword_list(self): # 以列表形式返回所有is_active=True的keyword return Keywords.objects.filter(is_active=True).values_list('keyword', flat=True)