Dynavera/apps/knowledge/models.py
2026-03-11 14:33:39 +00:00

75 lines
No EOL
2.8 KiB
Python

import os
from django.conf import settings
from django.db import transaction
from django.db.models import CASCADE, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField
from django.db.models.signals import post_delete, post_save
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from pgvector.django import VectorField
from apps.accounts.mixins import IdentifierMixin, TimeStampMixin
from apps.accounts.models import Role, User
class TrainingFile(IdentifierMixin, TimeStampMixin, Model):
STATUS_CHOICES = [
('ingesting', 'Ingesting'),
('chunked', 'Chunked'),
('embedded', 'Embedded'),
('failed', 'Failed'),
]
role = ForeignKey(Role, on_delete=CASCADE, related_name="training_files")
uploaded_by = ForeignKey(User, on_delete=CASCADE, related_name="uploaded_training_files")
file = FileField(upload_to='training_files/%Y/%m/%d/')
file_name = CharField(max_length=255)
file_size = IntegerField()
file_type = CharField(max_length=50)
description = TextField(blank=True, default='')
status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting')
is_processed = BooleanField(default=False)
class Meta:
verbose_name = _("Training File")
verbose_name_plural = _("Training Files")
ordering = ['-created_at']
def __str__(self) -> str:
return f"{self.file_name} ({self.role.name})"
class RoleRagDocument(IdentifierMixin, TimeStampMixin, Model):
role = ForeignKey(Role, on_delete=CASCADE, related_name='rag_documents')
training_file = ForeignKey(TrainingFile, on_delete=CASCADE, related_name='chunks', null=True, blank=True)
content = TextField()
content_hash = CharField(max_length=64, db_index=True)
embedding = VectorField(dimensions=settings.EMBEDDING_DIMENSIONS, null=True, blank=True)
metadata = JSONField(default=dict, blank=True)
chunk_index = IntegerField(default=0)
is_active = BooleanField(default=True)
class Meta:
verbose_name = _("Role RAG Document")
verbose_name_plural = _("Role RAG Documents")
def __str__(self) -> str:
return f"{self.role.name} - Chunk {self.chunk_index}"
@receiver(post_delete, sender=TrainingFile)
def delete_physical_file(sender, instance, **kwargs):
if instance.file:
if os.path.isfile(instance.file.path):
os.remove(instance.file.path)
@receiver(post_save, sender=TrainingFile)
def trigger_ingestion(sender, instance, created, **kwargs):
if created:
def _enqueue():
from apps.knowledge.tasks import ingest_training_file_task # L: circular import :(
ingest_training_file_task.delay(str(instance.uuid))
transaction.on_commit(_enqueue)