Dynavera/apps/knowledge/models.py

81 lines
3.2 KiB
Python
Raw Normal View History

import os
from django.conf import settings
2026-03-08 13:10:49 +00:00
from django.db import transaction
2026-03-15 22:19:12 +00:00
from django.db.models import CASCADE, SET_NULL, BooleanField, CharField, FileField, ForeignKey, IntegerField, JSONField, Model, TextField
from django.db.models.signals import post_delete, post_save
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from pgvector.django import VectorField
from apps.accounts.mixins import IdentifierMixin, TimeStampMixin
2026-03-15 22:19:12 +00:00
from apps.accounts.models import Organization, Role, User
class TrainingFile(IdentifierMixin, TimeStampMixin, Model):
STATUS_CHOICES = [
('ingesting', 'Ingesting'),
('chunked', 'Chunked'),
('embedded', 'Embedded'),
('failed', 'Failed'),
]
2026-03-15 22:19:12 +00:00
organization = ForeignKey(Organization, on_delete=CASCADE, related_name="training_files")
role = ForeignKey(Role, on_delete=CASCADE, related_name="training_files", null=True, blank=True)
uploaded_by = ForeignKey(User, on_delete=CASCADE, related_name="uploaded_training_files")
2026-03-15 22:19:12 +00:00
file = FileField(upload_to='training_files/%Y/%m/%d/')
file_name = CharField(max_length=255)
file_size = IntegerField()
file_type = CharField(max_length=50)
2026-03-15 22:19:12 +00:00
description = TextField(blank=True, default='')
status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting')
is_processed = BooleanField(default=False)
class Meta:
verbose_name = _("Training File")
verbose_name_plural = _("Training Files")
ordering = ['-created_at']
def __str__(self) -> str:
2026-03-15 22:19:12 +00:00
if self.role_id:
return f"{self.file_name} ({self.role.name})"
return f"{self.file_name} ({self.organization.name} - Organization-wide)"
class RoleRagDocument(IdentifierMixin, TimeStampMixin, Model):
2026-03-15 22:19:12 +00:00
organization = ForeignKey(Organization, on_delete=CASCADE, related_name='rag_documents')
role = ForeignKey(Role, on_delete=SET_NULL, related_name='rag_documents', null=True, blank=True)
training_file = ForeignKey(TrainingFile, on_delete=CASCADE, related_name='chunks', null=True, blank=True)
content = TextField()
content_hash = CharField(max_length=64, db_index=True)
2026-03-15 22:19:12 +00:00
embedding = VectorField(dimensions=settings.EMBEDDING_DIMENSIONS, null=True, blank=True)
2026-03-15 22:19:12 +00:00
metadata = JSONField(default=dict, blank=True)
chunk_index = IntegerField(default=0)
is_active = BooleanField(default=True)
class Meta:
verbose_name = _("Role RAG Document")
verbose_name_plural = _("Role RAG Documents")
def __str__(self) -> str:
2026-03-15 22:19:12 +00:00
if self.role_id:
return f"{self.role.name} - Chunk {self.chunk_index}"
return f"{self.organization.name} (Organization-wide) - Chunk {self.chunk_index}"
@receiver(post_delete, sender=TrainingFile)
def delete_physical_file(sender, instance, **kwargs):
if instance.file:
if os.path.isfile(instance.file.path):
os.remove(instance.file.path)
@receiver(post_save, sender=TrainingFile)
def trigger_ingestion(sender, instance, created, **kwargs):
if created:
2026-03-15 22:19:12 +00:00
def _enqueue():
2026-03-08 13:10:49 +00:00
from apps.knowledge.tasks import ingest_training_file_task # L: circular import :(
ingest_training_file_task.delay(str(instance.uuid))
transaction.on_commit(_enqueue)