Wiped all files

This commit is contained in:
Viswamedha Nalabotu 2026-02-26 00:09:52 +00:00
parent 4c50c9fb79
commit af1ca55611
128 changed files with 0 additions and 21107 deletions

View file

@ -1,37 +0,0 @@
*.sqlite3
__pycache__/
*.pyc
*.pyo
*.pyd
*.db
*.log
*.pot
*.mo
*.swp
*.yml
.DS_Store
.env
.vscode/
.idea/
.git/
.github/
.gitignore
.editorconfig
.prettierrc
.prettierignore
.nx/
venv/
env/
ENV/
.venv/
node_modules/
build/
dist/
*.egg-info/
celerybeat-schedule
*.md
*.bat
notebooks/
documents/
models/
eslint.config.mjs

View file

@ -1,26 +0,0 @@
root = true
[*.{yml,yaml}]
charset = utf-8
end_of_line = lf
insert_final_newline = true
[compose/**.yml]
indent_style = space
indent_size = 2
[compose/**.yaml]
indent_style = space
indent_size = 2
[docker-compose*.yml]
indent_style = space
indent_size = 2
[*.{js,jsx,mjs,cjs,ts,tsx,mts,cts,vue,css,scss,sass,less,styl}]
charset = utf-8
indent_size = 4
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
end_of_line = lf
max_line_length = 100

View file

@ -1,54 +0,0 @@
# Django .env template file
# Compose
COMPOSE_PROJECT_NAME=dynavera
# Directories
DJANGO_FRONT_DIR=front
DJANGO_MODEL_DIR=model
# Django core
DJANGO_SECRET_KEY=change-me-secure-key
DJANGO_DEBUG=False
DJANGO_DOMAIN_NAME=localhost
DJANGO_ALLOWED_HOSTS=localhost,127.0.0.1
# Celery
DJANGO_CELERY_BROKER_URL=redis://localhost:6379/0
# Static & Media paths
DJANGO_STATIC_URL=/static/
DJANGO_MEDIA_URL=/media/
DJANGO_STATIC_ROOT=static
DJANGO_MEDIA_ROOT=media
# Database
DJANGO_DB_ENGINE=django.db.backends.sqlite3
DJANGO_POSTGRES_DB=postgres_db_name
DJANGO_POSTGRES_USER=postgres_user
DJANGO_POSTGRES_PASSWORD=postgres_password
DJANGO_POSTGRES_HOST=localhost
DJANGO_POSTGRES_PORT=5432
POSTGRES_DB=postgres_db_name
POSTGRES_USER=postgres_user
POSTGRES_PASSWORD=postgres_password
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
# MCP Server
MCP_SERVER_HOST=localhost
MCP_SERVER_PORT=8001
# Production YAML
FYP_DJANGO_IMAGE=dynavera-django:prod
FYP_CELERY_IMAGE=dynavera-celery:prod
DJANGO_ENTRYPOINT=websecure
CERTRESOLVER=myresolver
DJANGO_PORT=8000
GITLAB_USER=yourgitlabuser
GITLAB_PASS=yourgitlabpass
GITLAB_SERVER_URL=https://gitlab.com/
GITLAB_RUNNER_REGISTRATION_TOKEN=your_registration_token
GITLAB_RUNNER_DOCKER_IMAGE=python:3.10-slim
GITLAB_RUNNER_IMAGE_TAG=latest

1
.gitattributes vendored
View file

@ -1 +0,0 @@
* text=auto eol=lf

271
.gitignore vendored
View file

@ -1,271 +0,0 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
# poetry.lock
# poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
# pdm.lock
# pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
# pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# Redis
*.rdb
*.aof
*.pid
# RabbitMQ
mnesia/
rabbitmq/
rabbitmq-data/
# ActiveMQ
activemq-data/
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# .idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
# Streamlit
.streamlit/secrets.toml
# Build
build/
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
.DS_Store
dist
dist-ssr
coverage
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
*.tsbuildinfo
.eslintcache
# Cypress
/cypress/videos/
/cypress/screenshots/
# Vitest
__screenshots__/
# Local batch files
*.local.bat
# Static files
static/
# Media files
media/
# Models
model/
# Gihub files
.github/

View file

@ -1,57 +0,0 @@
stages:
- test
- lint
- build
run_tests:
stage: test
image: python:3.12
variables:
DJANGO_SECRET_KEY: 'random_secret_key_for_ci'
before_script:
- python -m pip install --upgrade pip
- pip install --no-cache-dir -r requirements/django.txt
script:
- python manage.py test --verbosity=2
rules:
- if: $CI_COMMIT_BRANCH == "main"
check_node_syntax:
stage: lint
image: node:20-alpine
before_script:
- npm ci
script:
- npm run type-check
rules:
- if: $CI_COMMIT_BRANCH == "main"
build_and_push:
stage: build
image: docker:24.0.7
variables:
DOCKER_HOST: tcp://docker:2375
DOCKER_TLS_CERTDIR: ''
services:
- name: docker:24.0.7-dind
alias: docker
command: ['--tls=false', '--host=tcp://0.0.0.0:2375']
before_script:
- apk add --no-cache git
script:
- echo "Waiting for Docker daemon..."
- for i in $(seq 1 30); do docker info && break || sleep 1; done
- echo "Logging in to registry ${REGISTRY_URL}"
- echo "$REGISTRY_PASSWORD" | docker login -u "$REGISTRY_USERNAME" --password-stdin "$REGISTRY_URL"
- export DJANGO_IMAGE_NAME="${REGISTRY_URL}/${DJANGO_IMAGE_PATH}:${IMAGE_TAG}"
- echo "Building image ${DJANGO_IMAGE_NAME}"
- docker build -t "$DJANGO_IMAGE_NAME" -f ./compose/prod/django/Dockerfile --no-cache .
- echo "Pushing image ${DJANGO_IMAGE_NAME}"
- docker push "$DJANGO_IMAGE_NAME"
- export CELERY_IMAGE_NAME="${REGISTRY_URL}/${CELERY_IMAGE_PATH}:${IMAGE_TAG}"
- echo "Building Celery image ${CELERY_IMAGE_NAME}"
- docker build -t "$CELERY_IMAGE_NAME" -f ./compose/prod/celery/Dockerfile --no-cache .
- echo "Pushing Celery image ${CELERY_IMAGE_NAME}"
- docker push "$CELERY_IMAGE_NAME"
needs:
- run_tests

View file

@ -1,5 +0,0 @@
# Ignore docker-compose and other compose YAML files from Prettier formatting
docker-compose*.yml
docker-compose*.yaml
compose/**/*.yml
compose/**/*.yaml

View file

@ -1,9 +0,0 @@
{
"$schema": "https://json.schemastore.org/prettierrc",
"semi": false,
"useTabs": false,
"tabWidth": 4,
"singleQuote": true,
"printWidth": 100,
"htmlWhitespaceSensitivity": "ignore"
}

View file

@ -1,8 +0,0 @@
{
"recommendations": [
"Vue.volar",
"dbaeumer.vscode-eslint",
"EditorConfig.EditorConfig",
"prettier.prettier-vscode"
]
}

View file

@ -1,57 +0,0 @@
# An Agentic Approach to Role-Specific Trainers - Dynavera
A proof-of-concept platform for **automating the induction and support of new hires or team members** into a role using **AI agents**. This project demonstrates a reusable workflow that combines a modern full-stack application with AI-driven guidance and assessment.
---
## Table of Contents
- [Project Goals](#project-goals)
- [Tech Stack](#tech-stack)
- [Features](#features)
- [Usage](#usage)
---
## Project Goals
The main objectives of this project are:
1. **Reusable Workflow** Create a pipeline that can automatically onboard and guide new hires or team members in a specific role.
2. **AI Agent Integration** Use intelligent agents to provide guidance, monitor progress, and adapt learning to individual users.
3. **Real-World Testing** Evaluate the suitability and effectiveness of the tool in realistic onboarding scenarios.
4. **Role Specific Trainers** Support the creation of trainers specialized for different roles, fields, or industries.
---
## Tech Stack
- **Backend:** [Django](https://www.djangoproject.com/)
- **Frontend:** [Vue 3](https://vuejs.org/) + [Vite](https://vitejs.dev/)
- **AI Agents:** Python-based agents using websockets and potentially LangChain
- **APIs:** RESTful APIs using [Django REST Framework](https://www.django-rest-framework.org/)
- **Containerization:** [Docker](https://www.docker.com/) + [Docker Compose](https://docs.docker.com/compose/)
- **Database:** [PostgreSQL](https://www.postgresql.org/)
- **Websockets:** [Django Channels](https://channels.readthedocs.io/en/stable/)
- **State Management:** [Pinia](https://pinia.vuejs.org/)
- **Authentication:** JWT / OAuth2 / Django
---
## Features
- Automated onboarding workflow for new hires.
- Role-specific AI training modules.
- Adaptive guidance and personalized learning paths.
- Dashboard for tracking user progress and feedback.
- Modular AI agent integration (Python/JS).
---
## Usage
1. Navigate to the frontend URL (hosted at `https://fyp.viswamedha.com`).
2. Register a new user or login.
3. Select the role to train in.
4. Follow the guided AI-assisted onboarding workflow.
5. Track progress and view recommendations on the dashboard.

View file

View file

@ -1,66 +0,0 @@
from django.contrib import admin
from django.contrib.admin import ModelAdmin, TabularInline
from apps.mlstore.models import AgentModel, AgentRun, Agent, AgentEvent, RoleRagDocument
class AgentInline(TabularInline):
model = Agent
extra = 0
raw_id_fields = ('model',)
class AgentRunInline(TabularInline):
model = AgentRun
extra = 0
raw_id_fields = ('agent', 'user')
class AgentEventInline(TabularInline):
model = AgentEvent
extra = 0
raw_id_fields = ('execution',)
@admin.register(AgentModel)
class AgentModelAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'name', 'version')
search_fields = ('name', 'version')
inlines = (AgentInline,)
readonly_fields = ('uuid',)
@admin.register(Agent)
class AgentAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'model', 'status', 'started_at', 'completed_at', 'organization')
search_fields = ('model__name', 'uuid')
list_filter = ('status',)
inlines = (AgentRunInline,)
raw_id_fields = ('model',)
readonly_fields = ('uuid', 'started_at', 'completed_at')
@admin.register(AgentRun)
class AgentRunAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'agent', 'user', 'status', 'started_at', 'completed_at')
search_fields = ('uuid', 'agent__model__name', 'user__email_address')
list_filter = ('status',)
inlines = (AgentEventInline,)
raw_id_fields = ('agent', 'user')
readonly_fields = ('uuid', 'started_at', 'completed_at')
@admin.register(AgentEvent)
class AgentEventAdmin(ModelAdmin):
list_display = ('id', 'event_type', 'execution', 'timestamp')
search_fields = ('event_type', 'execution__uuid', 'execution__agent__model__name')
list_filter = ('event_type',)
raw_id_fields = ('execution',)
@admin.register(RoleRagDocument)
class RoleRagDocumentAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'role', 'training_file', 'chunk_index', 'is_active', 'created_at')
search_fields = ('role__name', 'training_file__file_name')
list_filter = ('is_active', 'created_at')
raw_id_fields = ('role', 'training_file')
readonly_fields = ('uuid', 'created_at', 'updated_at')

View file

@ -1,6 +0,0 @@
from django.apps import AppConfig
class MlstoreConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.mlstore'

View file

@ -1,234 +0,0 @@
import json
from channels.generic.websocket import AsyncWebsocketConsumer
from channels.db import database_sync_to_async
from django.utils import timezone
from .models import Agent, AgentRun, AgentEvent
from .tasks import start_fine_tune_run_task, infer_run_task
class MLStoreConsumer(AsyncWebsocketConsumer):
async def connect(self):
self.user = self.scope["user"]
self.agent_id = self.scope["url_route"]["kwargs"].get("agent_id")
self.room_group_name = f"mlstore_agent_{self.agent_id}"
if not self.user.is_authenticated:
await self.close()
return
agent = await self.get_agent(self.agent_id)
if not agent:
await self.close()
return
await self.channel_layer.group_add(self.room_group_name, self.channel_name)
await self.accept()
await self.send(json.dumps({
"type": "connection",
"message": "Connected to mlstore agent stream",
"agent_id": str(self.agent_id)
}))
async def disconnect(self, close_code):
await self.channel_layer.group_discard(self.room_group_name, self.channel_name)
async def receive(self, text_data):
try:
data = json.loads(text_data)
action = data.get("action")
if action == "fine_tune":
await self.handle_fine_tune(data)
elif action == "infer":
await self.handle_infer(data)
elif action == "onboarding_progress":
await self.handle_onboarding_progress(data)
elif action in ("stop_agent", "stop"):
await self.handle_stop(data)
else:
await self.send(json.dumps({
"type": "error",
"message": f"Unknown action: {action}"
}))
except json.JSONDecodeError:
await self.send(json.dumps({
"type": "error",
"message": "Invalid JSON"
}))
except Exception as e:
await self.send(json.dumps({
"type": "error",
"message": str(e)
}))
async def handle_fine_tune(self, data):
agent = await self.get_agent(self.agent_id)
if not agent:
await self.send(json.dumps({
"type": "error",
"message": "Agent not found"
}))
return
input_data = data.get("input_data") or {}
execution = await self.create_run(agent, self.user, input_data)
await self.send(json.dumps({
"type": "execution_started",
"execution_id": str(execution.uuid),
"agent_id": str(agent.uuid),
"message": f"Fine-tune run {execution.uuid} queued"
}))
start_fine_tune_run_task.delay(str(execution.uuid))
async def handle_infer(self, data):
agent = await self.get_agent(self.agent_id)
if not agent:
await self.send(json.dumps({
"type": "error",
"message": "Agent not found"
}))
return
input_data = data.get("input_data") or {}
role_uuid = input_data.get("role_uuid")
if not role_uuid:
options = input_data.get("options") or {}
role_uuid = options.get("role_uuid")
if not role_uuid:
await self.send(json.dumps({
"type": "error",
"message": "role_uuid is required for inference to enable RAG"
}))
return
execution = await self.create_run(agent, self.user, input_data)
await self.send(json.dumps({
"type": "execution_started",
"execution_id": str(execution.uuid),
"agent_id": str(agent.uuid),
"message": f"Inference run {execution.uuid} queued"
}))
infer_run_task.delay(str(execution.uuid))
async def handle_stop(self, data):
execution_id = data.get("execution_id")
if not execution_id:
await self.send(json.dumps({
"type": "error",
"message": "execution_id required to stop"
}))
return
execution = await self.get_execution(execution_id)
if not execution:
await self.send(json.dumps({
"type": "error",
"message": "Execution not found"
}))
return
await self.update_execution_status(execution, "failed")
await self.send(json.dumps({
"type": "execution_error",
"execution_id": str(execution.uuid),
"error_message": "Execution stopped by user"
}))
async def handle_onboarding_progress(self, data):
execution_id = data.get("execution_id")
if not execution_id:
await self.send(json.dumps({
"type": "error",
"message": "execution_id required for onboarding_progress"
}))
return
execution = await self.get_execution(execution_id)
if not execution:
await self.send(json.dumps({
"type": "error",
"message": "Execution not found"
}))
return
content = data.get("content") or data.get("progress") or {}
await self.create_event(execution, "progress", content)
await self.channel_layer.group_send(
self.room_group_name,
{
"type": "mlstore_event",
"event_type": "progress",
"content": content,
"timestamp": timezone.now().isoformat(),
}
)
async def mlstore_event(self, event):
await self.send(json.dumps({
"type": "mlstore_event",
"event_type": event["event_type"],
"content": event["content"],
"timestamp": event["timestamp"]
}))
async def mlstore_completed(self, event):
await self.send(json.dumps({
"type": "execution_completed",
"execution_id": event["execution_id"],
"output_data": event["output_data"],
"message": "Execution completed"
}))
async def mlstore_error(self, event):
await self.send(json.dumps({
"type": "execution_error",
"execution_id": event["execution_id"],
"error_message": event["error_message"]
}))
@database_sync_to_async
def get_agent(self, agent_id):
try:
return Agent.objects.get(uuid=agent_id)
except Agent.DoesNotExist:
return None
@database_sync_to_async
def create_run(self, agent, user, input_data):
return AgentRun.objects.create(
agent=agent,
user=user,
input_data=input_data,
)
@database_sync_to_async
def get_execution(self, execution_id):
try:
return AgentRun.objects.get(uuid=execution_id)
except AgentRun.DoesNotExist:
return None
@database_sync_to_async
def update_execution_status(self, execution, status):
execution.status = status
execution.completed_at = timezone.now()
execution.save()
try:
agent = execution.agent
agent.status = status
agent.completed_at = timezone.now()
agent.save()
except Exception:
pass
return execution
@database_sync_to_async
def create_event(self, execution, event_type, content):
return AgentEvent.objects.create(
execution=execution,
event_type=event_type,
content=content,
)

View file

@ -1,127 +0,0 @@
import django.db.models.deletion
import uuid
from django.conf import settings
from django.db import migrations, models
from pgvector.django import VectorField
def _create_vector_extension(apps, schema_editor):
if schema_editor.connection.vendor != 'postgresql':
return
with schema_editor.connection.cursor() as cursor:
cursor.execute('CREATE EXTENSION IF NOT EXISTS vector')
def _drop_vector_extension(apps, schema_editor):
if schema_editor.connection.vendor != 'postgresql':
return
with schema_editor.connection.cursor() as cursor:
cursor.execute('DROP EXTENSION IF EXISTS vector')
class Migration(migrations.Migration):
initial = True
dependencies = [
('orgs', '0001_initial'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.RunPython(
code=_create_vector_extension,
reverse_code=_drop_vector_extension,
),
migrations.CreateModel(
name='AgentModel',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(max_length=255)),
('version', models.CharField(max_length=50)),
('path', models.CharField(blank=True, default='', max_length=1024)),
],
options={
'verbose_name': 'Model',
'verbose_name_plural': 'Models',
},
),
migrations.CreateModel(
name='Agent',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('status', models.CharField(choices=[('idle', 'Idle'), ('running', 'Running'), ('paused', 'Paused'), ('completed', 'Completed'), ('failed', 'Failed')], default='idle', max_length=20)),
('description', models.TextField(blank=True, default='')),
('started_at', models.DateTimeField(blank=True, null=True)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('organization', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='agents', to='orgs.organization')),
('model', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='agents', to='mlstore.agentmodel')),
],
options={
'verbose_name': 'Agent Instance',
'verbose_name_plural': 'Agent Instances',
},
),
migrations.CreateModel(
name='AgentRun',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('status', models.CharField(choices=[('queued', 'Queued'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed')], default='queued', max_length=20)),
('input_data', models.JSONField(blank=True, default=dict)),
('output_data', models.JSONField(blank=True, default=dict)),
('error_message', models.TextField(blank=True, default='')),
('started_at', models.DateTimeField(blank=True, null=True)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('agent', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='runs', to='mlstore.agent')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='agent_runs', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Agent Run',
'verbose_name_plural': 'Agent Runs',
},
),
migrations.CreateModel(
name='AgentEvent',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('event_type', models.CharField(choices=[('started', 'Started'), ('message', 'Message'), ('progress', 'Progress'), ('completed', 'Completed'), ('error', 'Error'), ('step', 'Step')], max_length=20)),
('content', models.JSONField()),
('timestamp', models.DateTimeField(auto_now_add=True)),
('execution', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='events', to='mlstore.agentrun')),
],
options={
'verbose_name': 'Agent Event',
'verbose_name_plural': 'Agent Events',
'ordering': ['timestamp'],
},
),
migrations.CreateModel(
name='RoleRagDocument',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('content', models.TextField()),
('content_hash', models.CharField(db_index=True, max_length=64)),
('embedding', VectorField(blank=True, dimensions=1536, null=True)),
('metadata', models.JSONField(blank=True, default=dict)),
('chunk_index', models.IntegerField(default=0)),
('is_active', models.BooleanField(default=True)),
('role', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='rag_documents', to='orgs.role')),
('training_file', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='rag_documents', to='orgs.trainingfile')),
],
options={
'verbose_name': 'Role RAG Document',
'verbose_name_plural': 'Role RAG Documents',
},
),
]

View file

@ -1,125 +0,0 @@
from django.db.models import BigAutoField, BooleanField, CASCADE, CharField, DateTimeField, ForeignKey, JSONField, Model, TextField, UUIDField, IntegerField
from pgvector.django import VectorField
from apps.users.mixins import TimeStampMixin
from apps.users.models import User
from apps.orgs.models import Organization, Role, TrainingFile
from uuid import uuid4
class AgentModel(Model):
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, unique = True, editable = False)
name = CharField(max_length = 255)
version = CharField(max_length = 50)
path = CharField(max_length=1024, blank=True, default='')
class Meta:
verbose_name = 'Model'
verbose_name_plural = 'Models'
def __str__(self):
return self.name
class Agent(TimeStampMixin, Model):
STATUS_CHOICES = [
('idle', 'Idle'),
('running', 'Running'),
('paused', 'Paused'),
('completed', 'Completed'),
('failed', 'Failed'),
]
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, unique = True, editable = False)
model = ForeignKey(AgentModel, on_delete = CASCADE, related_name = 'agents')
organization = ForeignKey(Organization, on_delete = CASCADE, related_name = 'agents', null = True, blank = True)
status = CharField(max_length = 20, choices = STATUS_CHOICES, default = 'idle')
description = TextField(blank = True, default = '')
started_at = DateTimeField(null = True, blank = True)
completed_at = DateTimeField(null = True, blank = True)
class Meta:
verbose_name = 'Agent Instance'
verbose_name_plural = 'Agent Instances'
def __str__(self):
return f'{self.model.name} - {self.uuid}'
class AgentRun(TimeStampMixin, Model):
RUN_CHOICES = [
('queued', 'Queued'),
('running', 'Running'),
('completed', 'Completed'),
('failed', 'Failed'),
]
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, editable = False, unique = True)
agent = ForeignKey(Agent, on_delete = CASCADE, related_name = 'runs')
user = ForeignKey(User, on_delete = CASCADE, related_name = 'agent_runs')
status = CharField(max_length = 20, choices = RUN_CHOICES, default = 'queued')
input_data = JSONField(default = dict, blank = True)
output_data = JSONField(default = dict, blank = True)
error_message = TextField(blank = True, default = "")
started_at = DateTimeField(null = True, blank = True)
completed_at = DateTimeField(null = True, blank = True)
def __str__(self) -> str:
return f"Execution {self.uuid} - {self.agent} ({self.status})"
class Meta:
verbose_name = "Agent Run"
verbose_name_plural = "Agent Runs"
class AgentEvent(Model):
EVENT_TYPES = [
('started', 'Started'),
('message', 'Message'),
('progress', 'Progress'),
('completed', 'Completed'),
('error', 'Error'),
('step', 'Step'),
]
uuid = UUIDField(default = uuid4, editable = False, unique = True)
execution = ForeignKey(AgentRun, on_delete = CASCADE, related_name = 'events')
event_type = CharField(max_length = 20, choices = EVENT_TYPES)
content = JSONField()
timestamp = DateTimeField(auto_now_add = True)
def __str__(self) -> str:
return f"{self.id} - {self.event_type} - {self.execution.agent}"
class Meta:
ordering = ['timestamp']
verbose_name = "Agent Event"
verbose_name_plural = "Agent Events"
class RoleRagDocument(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, editable = False, unique = True)
role = ForeignKey(Role, on_delete = CASCADE, related_name = 'rag_documents')
training_file = ForeignKey(TrainingFile, on_delete = CASCADE, related_name = 'rag_documents', null = True, blank = True)
content = TextField()
content_hash = CharField(max_length = 64, db_index = True)
embedding = VectorField(dimensions = 1536, null = True, blank = True)
metadata = JSONField(default = dict, blank = True)
chunk_index = IntegerField(default = 0)
is_active = BooleanField(default = True)
class Meta:
verbose_name = "Role RAG Document"
verbose_name_plural = "Role RAG Documents"
def __str__(self) -> str:
return f"{self.role.name} - chunk {self.chunk_index}"

View file

@ -1,6 +0,0 @@
from django.urls import path
from . import consumers
websocket_urlpatterns = [
path("ws/mlstore/agents/<str:agent_id>/", consumers.MLStoreConsumer.as_asgi()),
]

View file

@ -1,54 +0,0 @@
from rest_framework.serializers import ModelSerializer
from .models import AgentModel, Agent, AgentRun, AgentEvent
from apps.orgs.serializers import OrganizationSerializer
class AgentModelSerializer(ModelSerializer):
class Meta:
model = AgentModel
fields = ['id', 'uuid', 'name', 'version', 'path']
read_only_fields = ['id', 'uuid']
class AgentSerializer(ModelSerializer):
model = AgentModelSerializer(read_only=True)
organization = OrganizationSerializer(read_only=True)
class Meta:
model = Agent
fields = [
'id',
'uuid',
'model',
'organization',
'status',
'description',
'started_at',
'completed_at',
]
read_only_fields = ['id', 'uuid', 'started_at', 'completed_at']
class AgentRunSerializer(ModelSerializer):
class Meta:
model = AgentRun
fields = [
'id',
'uuid',
'agent',
'user',
'status',
'input_data',
'output_data',
'error_message',
'started_at',
'completed_at',
]
read_only_fields = ['id', 'uuid', 'started_at', 'completed_at']
class AgentEventSerializer(ModelSerializer):
class Meta:
model = AgentEvent
fields = ['id', 'uuid', 'execution', 'event_type', 'content', 'timestamp']
read_only_fields = ['id', 'uuid', 'timestamp']

View file

@ -1,405 +0,0 @@
import asyncio
import logging
import os
import re
from typing import Any, Dict, List, Optional, Tuple
from django.conf import settings
from mcp_agent.mcp_client import MCPClient
from .models import AgentModel, RoleRagDocument
logger = logging.getLogger(__name__)
try:
from mcp_agent.mcp_server import BASE_MODEL_CACHE_DIR
BASE_MODEL_CACHE = BASE_MODEL_CACHE_DIR
except ImportError:
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
BASE_MODEL_CACHE = os.path.join(project_root, "model", "base-model")
logger.info(f"Base model cache directory reference: {BASE_MODEL_CACHE}")
async def _call_mcp(tool: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Internal async helper to call the MCP HTTP bridge via MCPClient."""
server_url = getattr(settings, "MCP_AGENT_URL")
client = MCPClient(server_url)
logger.info(f"MCP: Calling tool '{tool}' on {server_url}")
logger.debug(f"MCP: Arguments for '{tool}': {arguments}")
try:
resp = await client.send(tool, arguments)
logger.info(f"MCP: Tool '{tool}' completed successfully")
logger.debug(f"MCP: Response from '{tool}': {resp}")
return resp
except Exception as e:
logger.error(f"MCP: Tool '{tool}' failed with error: {str(e)}")
raise
finally:
await client.close()
def fine_tune_model(
base_model: str,
training_files: List[str],
hyperparams: Dict[str, Any],
name: str,
version: str,
) -> Dict[str, Any]:
"""Synchronously request a fine-tune run on the MCP server.
Expects the MCP tool `fine_tune` to accept: {base_model, training_files, hyperparams, name, version}
and to return a JSON-like dict containing at least `status` and on success `model_path` and `version`.
"""
logger.info(f"Fine-tuning model: name={name}, version={version}, base_model={base_model}")
logger.info(f"Training files count: {len(training_files)}")
logger.debug(f"Training files: {training_files}")
try:
logger.info("Calling MCP fine_tune tool...")
result = asyncio.run(_call_mcp("fine_tune", {
"base_model": base_model,
"training_files": training_files,
"hyperparams": hyperparams,
"name": name,
"version": version,
}))
logger.info(f"Fine-tune completed: status={result.get('status')}")
logger.debug(f"Fine-tune result: {result}")
return result
except Exception as e:
error_msg = str(e) if str(e) else f"Unknown error: {type(e).__name__}"
logger.error(f"Fine-tune failed: {error_msg}", exc_info=True)
return {
"status": "failed",
"error": error_msg,
"error_type": type(e).__name__,
}
def load_model_for_inference(model_path: str) -> Dict[str, Any]:
"""Tell the MCP server to load a model into memory/serving for inference.
Expects the MCP tool `load_model` with {model_path} returning status info.
"""
logger.info(f"Loading model for inference: {model_path}")
try:
result = asyncio.run(_call_mcp("load_model", {"model_path": model_path}))
logger.info(f"Model loaded successfully")
return result
except Exception as e:
logger.error(f"Failed to load model: {str(e)}", exc_info=True)
raise
def infer_with_model(model_path: str, prompt: str, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Request inference from the MCP server using a previously fine-tuned model.
Calls the MCP tool `infer` with {model_path, prompt, options}.
"""
logger.info(f"Running inference with model: {model_path}")
logger.debug(f"Prompt length: {len(prompt)} characters")
logger.debug(f"Inference options: {options}")
try:
result = asyncio.run(_call_mcp("infer", {"model_path": model_path, "prompt": prompt, "options": options or {}}))
logger.info(f"Inference completed successfully")
logger.debug(f"Inference result keys: {list(result.keys()) if isinstance(result, dict) else 'not a dict'}")
return result
except Exception as e:
logger.error(f"Inference failed: {str(e)}", exc_info=True)
raise
def register_model_in_db(name: str, version: str, model_path: str) -> AgentModel:
"""Convenience DB helper: create and return an AgentModel record.
NOTE: migrations are required after the model field change prior to using this in production.
"""
return AgentModel.objects.create(name=name, version=version, path=model_path)
def embed_texts(texts: List[str]) -> List[List[float]]:
"""Generate embeddings for texts using the MCP embedding service.
Falls back to local sentence-transformers if MCP unavailable.
Args:
texts: List of text strings to embed.
Returns:
List of embedding vectors (list of floats).
Raises:
RuntimeError: If both MCP and local embedding fail.
"""
logger.info(f"Embedding {len(texts)} texts")
try:
result = asyncio.run(_call_mcp("embed", {"texts": texts}))
embeddings = result.get("embeddings", [])
if embeddings and len(embeddings) == len(texts):
logger.info(f"Successfully embedded {len(texts)} texts via MCP")
return embeddings
except Exception as e:
logger.warning(f"MCP embedding failed, trying local fallback: {e}")
try:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts).tolist()
logger.info(f"Successfully embedded {len(texts)} texts via local model")
return embeddings
except Exception as e:
logger.error(f"Local embedding also failed: {e}")
raise RuntimeError(f"Failed to embed texts: {e}")
def embed_text(text: str) -> List[float]:
"""Generate embedding for a single text.
Args:
text: Text string to embed.
Returns:
Embedding vector (list of floats).
"""
return embed_texts([text])[0]
def search_similar_documents(
query: str,
role_uuid: str,
top_k: int = 5,
similarity_threshold: float = 0.0,
) -> List[Tuple[RoleRagDocument, float]]:
"""Search for documents similar to the query using vector similarity.
Args:
query: Query text to embed and search for.
role_uuid: UUID of role to scope search.
top_k: Number of top results to return.
similarity_threshold: Minimum similarity score (0-1) to include results.
Returns:
List of (RoleRagDocument, similarity_score) tuples, ordered by similarity DESC.
Raises:
ValueError: If role not found or embedding fails.
"""
from apps.orgs.models import Role
try:
query_embedding = embed_text(query)
logger.info(f"Embedded query: '{query[:50]}...' to {len(query_embedding)}D vector")
except Exception as e:
logger.error(f"Failed to embed query: {e}")
raise ValueError(f"Failed to embed query: {e}")
try:
role = Role.objects.get(uuid=role_uuid)
except Role.DoesNotExist:
raise ValueError(f"Role with UUID {role_uuid} not found")
queryset = RoleRagDocument.objects.filter(
role=role,
)
if not queryset.exists():
logger.warning(f"No documents with embeddings found for role {role.uuid}")
return []
from django.db import connection
with connection.cursor() as cursor:
query_sql = """
SELECT id, 1 - (embedding <=> %s::vector) as similarity
FROM mlstore_roleragdocument
WHERE role_id = %s AND embedding IS NOT NULL
ORDER BY similarity DESC
LIMIT %s
"""
cursor.execute(
query_sql,
)
doc_ids_with_scores = cursor.fetchall()
if not doc_ids_with_scores:
logger.info(f"No similar documents found for query in role {role.uuid}")
return []
filtered_docs = [
(doc_id, score)
for doc_id, score in doc_ids_with_scores
if score >= similarity_threshold
][:top_k]
if not filtered_docs:
logger.info(
f"No documents met similarity threshold {similarity_threshold}"
)
return []
doc_ids = [doc_id for doc_id, _ in filtered_docs]
doc_scores = {doc_id: score for doc_id, score in filtered_docs}
documents = RoleRagDocument.objects.filter(id__in=doc_ids)
results = [
(doc, doc_scores[doc.id])
for doc in documents
if doc.id in doc_scores
]
results.sort(key=lambda x: x[1], reverse=True)
logger.info(
f"Found {len(results)} similar documents for query "
f"(threshold={similarity_threshold}, top_k={top_k})"
)
return results
def batch_embed_documents(
documents: List[RoleRagDocument],
batch_size: int = 32,
force_reembed: bool = False,
) -> Tuple[int, int]:
"""Batch embed documents that don't have embeddings yet.
Args:
documents: List of RoleRagDocument instances to embed.
batch_size: Number of documents to embed per API call.
force_reembed: If True, re-embed documents that already have embeddings.
Returns:
Tuple of (num_embedded, num_failed).
Note:
Updates documents in-place with embedding values.
"""
to_embed = [
doc for doc in documents
if force_reembed or not doc.embedding
]
if not to_embed:
logger.info("No documents to embed")
return 0, 0
num_embedded = 0
num_failed = 0
for i in range(0, len(to_embed), batch_size):
batch = to_embed[i : i + batch_size]
logger.info(
f"Embedding batch {i // batch_size + 1} "
f"({len(batch)} documents)"
)
try:
texts = [doc.content for doc in batch]
embeddings = embed_texts(texts)
for doc, embedding in zip(batch, embeddings):
doc.embedding = embedding
num_embedded += 1
RoleRagDocument.objects.bulk_update(batch, ["embedding"], batch_size=500)
logger.info(f"Successfully embedded {len(batch)} documents")
except Exception as e:
logger.error(f"Failed to embed batch: {e}")
num_failed += len(batch)
logger.info(
f"Embedding complete: {num_embedded} embedded, {num_failed} failed"
)
return num_embedded, num_failed
def get_context_for_query(
query: str,
role_uuid: str,
top_k: int = 5,
similarity_threshold: float = 0.5,
) -> str:
"""Get context string from similar documents for a query.
Useful for augmenting prompts with retrieved context.
Args:
query: Query text.
role_uuid: UUID of role to search within.
top_k: Number of top results to include.
similarity_threshold: Minimum similarity score.
Returns:
Formatted context string with source attribution.
"""
def _clean_chunk_text(text: str) -> str:
"""Strip junk and deduplicate paragraphs to keep context lean."""
if not text:
return ""
text = re.sub(r"\[\s*Answer\s*:.*?\]", "", text, flags=re.IGNORECASE | re.DOTALL)
lines = []
for raw_line in text.splitlines():
line = raw_line.strip()
if not line:
lines.append("")
continue
lower = line.lower()
if line.startswith("#"):
continue
if "do you have any questions" in lower:
continue
if "feel free to ask" in lower:
continue
if "references" in lower or "sources" in lower or "wikipedia" in lower:
continue
lines.append(line)
cleaned = "\n".join(lines)
paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", cleaned) if p.strip()]
seen = set()
unique_paragraphs: List[str] = []
for para in paragraphs:
if para in seen:
continue
seen.add(para)
unique_paragraphs.append(para)
return "\n\n".join(unique_paragraphs)
try:
results = search_similar_documents(
query=query,
role_uuid=role_uuid,
top_k=top_k,
similarity_threshold=similarity_threshold,
)
except Exception as e:
logger.warning(f"Failed to retrieve context: {e}")
return ""
if not results:
return ""
context_parts = []
for doc, similarity in results:
cleaned = _clean_chunk_text(doc.content)
if not cleaned:
continue
source = "unknown"
if doc.training_file:
source = doc.training_file.file_name
context_parts.append(
f"[Source: {source}, Similarity: {similarity:.2%}]\n{cleaned}\n"
)
context = "\n---\n".join(context_parts)
return context

View file

@ -1,657 +0,0 @@
import logging
import os
import re
import time
import traceback
from hashlib import sha256
from asgiref.sync import async_to_sync
from celery import shared_task
from channels.layers import get_channel_layer
from django.utils import timezone
from django.db import transaction
from apps.orgs.models import TrainingFile, Role
from . import services
from .models import Agent, AgentEvent, AgentModel, AgentRun, RoleRagDocument
logger = logging.getLogger(__name__)
def _get_mem_info() -> str:
try:
with open('/proc/self/status', 'r', encoding='utf-8') as f:
lines = f.read().splitlines()
mem = {line.split(':', 1)[0]: line.split(':', 1)[1].strip() for line in lines if ':' in line}
return f"VmRSS={mem.get('VmRSS','?')}, VmHWM={mem.get('VmHWM','?')}, VmSize={mem.get('VmSize','?')}"
except Exception:
return "mem_info_unavailable"
def _estimate_tokens(text: str) -> int:
if not text:
return 0
return len(re.findall(r"\w+|[^\s\w]", text))
def _split_semantic_units(text: str) -> list[str]:
paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", text) if p.strip()]
units: list[str] = []
for para in paragraphs:
sentences = re.split(r"(?<=[.!?])\s+", para)
for sent in sentences:
sent = sent.strip()
if sent:
units.append(sent)
return units or paragraphs
def _chunk_text(text: str, max_tokens: int = 400, overlap_tokens: int = 50) -> list[str]:
if not text:
return []
units = _split_semantic_units(text)
logger.info(
"Semantic chunking units=%s max_tokens=%s overlap_tokens=%s mem=%s",
len(units),
max_tokens,
overlap_tokens,
_get_mem_info(),
)
chunks: list[str] = []
current: list[str] = []
current_tokens = 0
for unit in units:
unit_tokens = _estimate_tokens(unit)
if unit_tokens == 0:
continue
if current_tokens + unit_tokens > max_tokens and current:
chunk = " ".join(current).strip()
if chunk:
chunks.append(chunk)
if overlap_tokens > 0:
overlap: list[str] = []
overlap_count = 0
for prev in reversed(current):
prev_tokens = _estimate_tokens(prev)
if overlap_count + prev_tokens > overlap_tokens:
break
overlap.insert(0, prev)
overlap_count += prev_tokens
current = overlap
current_tokens = overlap_count
else:
current = []
current_tokens = 0
current.append(unit)
current_tokens += unit_tokens
if current:
chunk = " ".join(current).strip()
if chunk:
chunks.append(chunk)
return chunks
def _extract_text_from_file(file_path: str, file_type: str | None) -> str:
file_type = (file_type or '').lower()
if file_type in {'txt', 'md', 'csv', 'json'}:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
if file_type == 'pdf':
try:
from PyPDF2 import PdfReader
except Exception as e:
raise RuntimeError('PyPDF2 is required to parse PDF files') from e
reader = PdfReader(file_path)
return "\n".join(page.extract_text() or "" for page in reader.pages)
if file_type in {'docx', 'doc'}:
try:
import docx
except Exception as e:
raise RuntimeError('python-docx is required to parse DOCX files') from e
doc = docx.Document(file_path)
return "\n".join(p.text for p in doc.paragraphs)
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
def _send_group_event(room_group_name: str, event_type: str, content: dict):
channel_layer = get_channel_layer()
async_to_sync(channel_layer.group_send)(
room_group_name,
{
"type": "mlstore_event",
"event_type": event_type,
"content": content,
"timestamp": timezone.now().isoformat(),
}
)
def _persist_event(execution: AgentRun, event_type: str, content: dict):
AgentEvent.objects.create(
execution=execution,
event_type=event_type,
content=content,
)
def _update_agent_status(agent: Agent, status: str):
agent.status = status
if status == "running":
agent.started_at = timezone.now()
elif status in ("completed", "failed"):
agent.completed_at = timezone.now()
agent.save()
@shared_task
def start_fine_tune_run_task(execution_id: str):
logger.info(f"Fine-tune run task started for execution: {execution_id}")
try:
execution = AgentRun.objects.get(uuid=execution_id)
except AgentRun.DoesNotExist:
logger.error(f"Execution not found: {execution_id}")
return {"status": "error", "error": "execution_not_found", "execution_id": execution_id}
agent = execution.agent
room_group_name = f"mlstore_agent_{agent.uuid}"
logger.info(f"Agent: {agent.uuid}, User: {execution.user.email_address}")
execution.status = "running"
execution.started_at = timezone.now()
execution.save()
_update_agent_status(agent, "running")
logger.info(f"Execution {execution_id} status updated to 'running'")
from apps.mlstore.services import BASE_MODEL_CACHE
logger.info(f"Base model cache directory: {BASE_MODEL_CACHE}")
input_data = execution.input_data or {}
base_model = input_data.get("base_model") or agent.model.name
training_files = input_data.get("training_files") or []
org_training_files = []
role_uuid = input_data.get("role_uuid")
if not training_files and agent.organization:
training_files_qs = TrainingFile.objects.filter(
role__organization=agent.organization,
is_processed=False
).select_related('uploaded_by', 'role')
if role_uuid:
try:
role = Role.objects.get(uuid=role_uuid, organization=agent.organization)
training_files_qs = training_files_qs.filter(role=role)
except Role.DoesNotExist:
logger.warning(f"Role {role_uuid} not found for organization {agent.organization.name}")
org_training_files = list(training_files_qs)
training_files = [tf.file.path for tf in org_training_files if tf.file]
logger.info(f"Fetched {len(training_files)} training files from organization {agent.organization.name}")
hyperparams = input_data.get("hyperparams") or {}
name = input_data.get("name") or agent.model.name
if not input_data.get("version"):
existing_models = AgentModel.objects.filter(name=name).order_by('-version')
if existing_models.exists():
last_version = existing_models.first().version
try:
if last_version.startswith('v'):
num = int(last_version[1:])
version = f"v{num + 1}"
else:
version = f"v1"
except:
version = "v1"
else:
version = "v1"
else:
version = input_data.get("version")
logger.info(f"Fine-tune parameters: base_model={base_model}, name={name}, version={version}")
_send_group_event(room_group_name, "started", {"execution_id": str(execution.uuid), "action": "fine_tune"})
_persist_event(execution, "started", {"execution_id": str(execution.uuid), "action": "fine_tune"})
try:
result = services.fine_tune_model(base_model, training_files, hyperparams, name, version)
logger.info(f"Fine-tune result received: {result.get('status')}")
logger.debug(f"Full fine-tune result: {result}")
if isinstance(result, dict) and result.get("status") == "completed":
model_path = result.get("model_path") or result.get("path") or ""
model_version = result.get("version") or version
new_model = AgentModel.objects.create(name=name, version=model_version, path=model_path)
agent.model = new_model
agent.save()
logger.info(f"Fine-tune completed. New model created: {new_model.uuid} at {model_path}")
if org_training_files:
file_ids = [tf.id for tf in org_training_files]
TrainingFile.objects.filter(id__in=file_ids).update(is_processed=True)
logger.info(f"Marked {len(org_training_files)} training files as processed")
execution.status = "completed"
execution.output_data = {
"result": result,
"model_id": new_model.id,
"model_uuid": str(new_model.uuid),
}
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "completed")
logger.info(f"Execution {execution_id} completed successfully")
_send_group_event(room_group_name, "completed", {"execution_id": str(execution.uuid), "model_id": new_model.id, "model_path": model_path})
_persist_event(execution, "completed", {"execution_id": str(execution.uuid), "model_id": new_model.id, "model_path": model_path})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_completed",
"execution_id": str(execution.uuid),
"output_data": execution.output_data,
},
)
return {"status": "completed", "execution_id": execution_id, "model_id": new_model.id}
logger.warning(f"Fine-tune did not complete successfully. Status: {result.get('status')}")
execution.status = "failed"
execution.error_message = str(result)
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "failed")
_send_group_event(room_group_name, "error", {"execution_id": str(execution.uuid), "error": result})
_persist_event(execution, "error", {"execution_id": str(execution.uuid), "error": result})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_error",
"execution_id": str(execution.uuid),
"error_message": str(result),
},
)
return {"status": "failed", "execution_id": execution_id, "result": result}
except Exception as e:
logger.error(f"Fine-tune task failed with exception for execution {execution_id}: {str(e)}", exc_info=True)
traceback.print_exc()
execution.status = "failed"
execution.error_message = str(e)
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "failed")
_send_group_event(room_group_name, "error", {"execution_id": str(execution.uuid), "error": str(e)})
_persist_event(execution, "error", {"execution_id": str(execution.uuid), "error": str(e)})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_error",
"execution_id": str(execution.uuid),
"error_message": str(e),
},
)
return {"status": "error", "execution_id": execution_id, "error": str(e)}
@shared_task
def ingest_training_file_task(training_file_uuid: str):
logger.info(f"Ingest task started for training_file_uuid={training_file_uuid}")
started_at = time.time()
try:
training_file = TrainingFile.objects.select_related('role').get(uuid=training_file_uuid)
except TrainingFile.DoesNotExist:
logger.error(f"Training file not found: {training_file_uuid}")
return {"status": "error", "error": "training_file_not_found"}
if training_file.is_processed:
logger.info(f"Training file already processed: {training_file_uuid}")
return {"status": "skipped", "reason": "already_processed"}
if not training_file.file:
logger.error(f"Training file has no file attached: {training_file_uuid}")
return {"status": "error", "error": "file_missing"}
try:
file_path = training_file.file.path
file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
logger.info(
"Ingesting file: name=%s type=%s size_bytes=%s role=%s path=%s",
training_file.file_name,
training_file.file_type,
file_size,
training_file.role_id,
file_path,
)
except Exception as e:
logger.warning(f"Failed to stat training file for {training_file_uuid}: {str(e)}")
try:
training_file.status = 'ingesting'
training_file.save(update_fields=['status'])
extract_started = time.time()
text = _extract_text_from_file(training_file.file.path, training_file.file_type)
logger.info(
"Extracted text length=%s for training_file_uuid=%s in %.2fs mem=%s",
len(text),
training_file_uuid,
time.time() - extract_started,
_get_mem_info(),
)
chunk_started = time.time()
chunks = _chunk_text(text)
logger.info(
"Chunked text into %s chunks in %.2fs (sample lengths: %s) mem=%s",
len(chunks),
time.time() - chunk_started,
[len(c) for c in chunks[:5]],
_get_mem_info(),
)
if not chunks:
raise RuntimeError("No text extracted from file")
with transaction.atomic():
logger.info("Clearing existing RAG docs for training_file_uuid=%s mem=%s", training_file_uuid, _get_mem_info())
RoleRagDocument.objects.filter(training_file=training_file).delete()
logger.info("Preparing %s RAG docs for bulk_create mem=%s", len(chunks), _get_mem_info())
existing_hashes = set(
RoleRagDocument.objects.filter(role=training_file.role)
.values_list('content_hash', flat=True)
)
documents = []
skipped = 0
for index, chunk in enumerate(chunks):
content_hash = sha256(chunk.encode('utf-8')).hexdigest()
if content_hash in existing_hashes:
skipped += 1
continue
documents.append(
RoleRagDocument(
role=training_file.role,
training_file=training_file,
content=chunk,
embedding=None,
content_hash=content_hash,
metadata={
"file_name": training_file.file_name,
"file_type": training_file.file_type,
"chunk_size": len(chunk),
"source": "training_file",
},
chunk_index=index,
)
)
logger.info("Skipped %s duplicate chunks based on content_hash", skipped)
logger.info("Bulk creating RAG docs count=%s mem=%s", len(documents), _get_mem_info())
RoleRagDocument.objects.bulk_create(documents, batch_size=500)
training_file.status = 'chunked'
training_file.is_processed = True
training_file.save(update_fields=['status', 'is_processed'])
elapsed = time.time() - started_at
logger.info(
"Ingested training file %s into %s RAG chunks in %.2fs",
training_file_uuid,
len(chunks),
elapsed,
)
logger.info(f"Enqueueing embedding task for training_file_uuid={training_file_uuid}")
embed_training_file_task.delay(training_file_uuid)
return {"status": "completed", "chunks": len(chunks)}
except Exception as e:
elapsed = time.time() - started_at
logger.error(f"Failed to ingest training file {training_file_uuid}: {str(e)}", exc_info=True)
logger.error(f"Ingest task failed after {elapsed:.2f}s for training_file_uuid={training_file_uuid}")
try:
TrainingFile.objects.filter(uuid=training_file_uuid).update(status='failed')
except Exception:
pass
return {"status": "error", "error": str(e)}
@shared_task
def embed_training_file_task(training_file_uuid: str):
"""Generate embeddings for all documents of a training file.
This task is called after chunking to embed the document chunks
using the configured embedding provider (OpenAI, Google, or local).
"""
logger.info(f"Embedding task started for training_file_uuid={training_file_uuid}")
started_at = time.time()
try:
training_file = TrainingFile.objects.select_related('role').get(uuid=training_file_uuid)
except TrainingFile.DoesNotExist:
logger.error(f"Training file not found: {training_file_uuid}")
return {"status": "error", "error": "training_file_not_found"}
try:
documents = list(RoleRagDocument.objects.filter(training_file=training_file))
if not documents:
logger.warning(f"No RAG documents found for training_file_uuid={training_file_uuid}")
return {"status": "skipped", "reason": "no_documents"}
logger.info(
f"Starting to embed {len(documents)} documents for training_file_uuid={training_file_uuid} "
f"mem={_get_mem_info()}"
)
num_embedded, num_failed = services.batch_embed_documents(documents, batch_size=32)
if num_failed == 0:
training_file.status = 'embedded'
training_file.save(update_fields=['status'])
logger.info(f"Successfully embedded all documents for training_file_uuid={training_file_uuid}")
elif num_embedded > 0:
training_file.status = 'embedded'
training_file.save(update_fields=['status'])
logger.warning(
f"Partially embedded {num_embedded} documents, {num_failed} failed "
f"for training_file_uuid={training_file_uuid}"
)
else:
training_file.status = 'failed'
training_file.save(update_fields=['status'])
logger.error(f"Failed to embed any documents for training_file_uuid={training_file_uuid}")
return {"status": "error", "error": "embedding_failed", "num_failed": num_failed}
elapsed = time.time() - started_at
logger.info(
f"Embedding task completed for {training_file_uuid}: "
f"embedded={num_embedded}, failed={num_failed}, time={elapsed:.2f}s"
)
return {
"status": "completed",
"num_embedded": num_embedded,
"num_failed": num_failed,
"elapsed": elapsed,
}
except Exception as e:
elapsed = time.time() - started_at
logger.error(
f"Failed to embed training file {training_file_uuid}: {str(e)}",
exc_info=True
)
try:
TrainingFile.objects.filter(uuid=training_file_uuid).update(status='failed')
except Exception:
pass
return {"status": "error", "error": str(e), "elapsed": elapsed}
@shared_task
def infer_run_task(execution_id: str):
logger.info(f"Inference run task started for execution: {execution_id}")
try:
execution = AgentRun.objects.get(uuid=execution_id)
except AgentRun.DoesNotExist:
logger.error(f"Execution not found: {execution_id}")
return {"status": "error", "error": "execution_not_found", "execution_id": execution_id}
agent = execution.agent
room_group_name = f"mlstore_agent_{agent.uuid}"
logger.info(f"Agent: {agent.uuid}, User: {execution.user.email_address}")
execution.status = "running"
execution.started_at = timezone.now()
execution.save()
_update_agent_status(agent, "running")
logger.info(f"Execution {execution_id} status updated to 'running'")
input_data = execution.input_data or {}
prompt = input_data.get("prompt") or input_data.get("query") or ""
options = dict(input_data.get("options") or {})
role_uuid = input_data.get("role_uuid") or options.get("role_uuid")
rag_top_k = int(input_data.get("rag_top_k", 5))
rag_similarity_threshold = float(input_data.get("rag_similarity_threshold", 0.5))
options.setdefault("temperature", 0.2)
options.setdefault("top_p", 0.9)
options.setdefault("max_tokens", 200)
options.setdefault("stop", ["\n\n", "References:", "Sources:"])
logger.info(f"Prompt length: {len(prompt)} characters")
if not role_uuid:
logger.warning(f"No role_uuid provided for inference run {execution_id}")
execution.status = "failed"
execution.error_message = "role_uuid_required"
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "failed")
_send_group_event(room_group_name, "error", {"execution_id": str(execution.uuid), "error": "role_uuid_required"})
_persist_event(execution, "error", {"execution_id": str(execution.uuid), "error": "role_uuid_required"})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_error",
"execution_id": str(execution.uuid),
"error_message": "role_uuid_required",
},
)
return {"status": "failed", "execution_id": execution_id, "error": "role_uuid_required"}
if role_uuid and prompt:
try:
context = services.get_context_for_query(
query=prompt,
role_uuid=str(role_uuid),
top_k=rag_top_k,
similarity_threshold=rag_similarity_threshold,
)
if context:
logger.info(f"RAG context retrieved for role={role_uuid} (top_k={rag_top_k})")
prompt = (
"You are a technical assistant.\n\n"
"Answer the question using ONLY the information in the context.\n"
"Do NOT:\n"
"- ask follow-up questions\n"
"- include hashtags\n"
"- include references or sources\n"
"- repeat the question\n"
"- add headings or sections\n"
"- add information not present in the context\n\n"
"Answer in 3-6 concise sentences.\n"
"If the context is insufficient, say: \"The context does not provide enough information.\"\n\n"
"Context:\n"
f"{context}\n\n"
"Question:\n"
f"{prompt}\n\n"
"Answer:"
)
else:
logger.info(f"No RAG context found for role={role_uuid}")
except Exception as e:
logger.warning(f"RAG context retrieval failed for role={role_uuid}: {e}")
if not prompt:
logger.warning(f"No prompt provided for inference run {execution_id}")
execution.status = "failed"
execution.error_message = "prompt_required"
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "failed")
_send_group_event(room_group_name, "error", {"execution_id": str(execution.uuid), "error": "prompt_required"})
_persist_event(execution, "error", {"execution_id": str(execution.uuid), "error": "prompt_required"})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_error",
"execution_id": str(execution.uuid),
"error_message": "prompt_required",
},
)
return {"status": "failed", "execution_id": execution_id, "error": "prompt_required"}
_send_group_event(room_group_name, "started", {"execution_id": str(execution.uuid), "action": "infer"})
_persist_event(execution, "started", {"execution_id": str(execution.uuid), "action": "infer"})
try:
try:
logger.info(f"Loading model: {agent.model.path}")
services.load_model_for_inference(agent.model.path)
except Exception as e:
logger.warning(f"Failed to preload model: {str(e)}")
pass
logger.info(f"Starting inference with model: {agent.model.path}")
result = services.infer_with_model(agent.model.path, prompt, options)
execution.status = "completed"
execution.output_data = {"result": result}
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "completed")
logger.info(f"Inference execution {execution_id} completed successfully")
_send_group_event(room_group_name, "completed", {"execution_id": str(execution.uuid), "result": result})
_persist_event(execution, "completed", {"execution_id": str(execution.uuid), "result": result})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_completed",
"execution_id": str(execution.uuid),
"output_data": execution.output_data,
},
)
return {"status": "completed", "execution_id": execution_id}
except Exception as e:
logger.error(f"Inference task failed with exception for execution {execution_id}: {str(e)}", exc_info=True)
traceback.print_exc()
execution.status = "failed"
execution.error_message = str(e)
execution.completed_at = timezone.now()
execution.save()
_update_agent_status(agent, "failed")
_send_group_event(room_group_name, "error", {"execution_id": str(execution.uuid), "error": str(e)})
_persist_event(execution, "error", {"execution_id": str(execution.uuid), "error": str(e)})
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_error",
"execution_id": str(execution.uuid),
"error_message": str(e),
},
)
return {"status": "failed", "execution_id": execution_id, "error": str(e)}

View file

@ -1,91 +0,0 @@
from unittest.mock import patch
from django.contrib.auth import get_user_model
from django.test import TestCase
from rest_framework.test import APIRequestFactory, force_authenticate
from rest_framework.status import HTTP_200_OK, HTTP_400_BAD_REQUEST, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
from apps.orgs.models import Organization, Role
from apps.mlstore.models import AgentModel, Agent, AgentRun, AgentEvent, RoleRagDocument
from apps.mlstore.viewsets import AgentViewSet, AgentRunViewSet
User = get_user_model()
class MLStoreAPITests(TestCase):
def setUp(self):
self.factory = APIRequestFactory()
self.user = User.objects.create_user(email_address='user@example.com', password='pass')
self.other = User.objects.create_user(email_address='other@example.com', password='pass')
self.manager = User.objects.create_user(email_address='manager@example.com', password='pass', is_manager=True)
self.org = Organization.objects.create(name='Org', owner=self.manager)
self.role = Role.objects.create(name='Engineer', organization=self.org)
self.model = AgentModel.objects.create(name='test-model', version='v1', path='model.gguf')
self.agent = Agent.objects.create(model=self.model, organization=self.org)
def test_agents_list_requires_auth(self):
view = AgentViewSet.as_view({'get': 'list'})
request = self.factory.get('/')
response = view(request)
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
def test_agents_list_authenticated(self):
view = AgentViewSet.as_view({'get': 'list'})
request = self.factory.get('/')
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
def test_agent_runs_scoped_to_user(self):
AgentRun.objects.create(agent=self.agent, user=self.user)
AgentRun.objects.create(agent=self.agent, user=self.other)
view = AgentRunViewSet.as_view({'get': 'list'})
request = self.factory.get('/')
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertEqual(len(response.data), 1)
def test_agent_run_events(self):
run = AgentRun.objects.create(agent=self.agent, user=self.user)
AgentEvent.objects.create(execution=run, event_type='message', content={'msg': 'hi'})
view = AgentRunViewSet.as_view({'get': 'events'})
request = self.factory.get('/')
force_authenticate(request, user=self.user)
response = view(request, uuid=str(run.uuid))
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertEqual(len(response.data), 1)
def test_retrieve_context_missing_params(self):
view = AgentRunViewSet.as_view({'post': 'retrieve_context'})
request = self.factory.post('/', {})
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_retrieve_context_role_not_found(self):
view = AgentRunViewSet.as_view({'post': 'retrieve_context'})
request = self.factory.post('/', {'query': 'q', 'role_uuid': '00000000-0000-0000-0000-000000000000'})
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_404_NOT_FOUND)
@patch('apps.mlstore.viewsets.services.search_similar_documents')
@patch('apps.mlstore.viewsets.services.get_context_for_query')
def test_retrieve_context_success(self, mock_context, mock_search):
doc = RoleRagDocument.objects.create(
role=self.role,
content='chunk',
content_hash='hash',
chunk_index=0,
)
mock_search.return_value = [(doc, 0.9)]
mock_context.return_value = 'context text'
view = AgentRunViewSet.as_view({'post': 'retrieve_context'})
payload = {'query': 'hello', 'role_uuid': str(self.role.uuid)}
request = self.factory.post('/', payload, format='json')
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertEqual(response.data.get('num_results'), 1)
self.assertEqual(response.data.get('context'), 'context text')

View file

@ -1,41 +0,0 @@
from django.test import TestCase
from django.contrib.auth import get_user_model
from apps.orgs.models import Organization, Role
from apps.mlstore.models import AgentModel, Agent, AgentRun, AgentEvent, RoleRagDocument
User = get_user_model()
class MLStoreModelTests(TestCase):
def setUp(self):
self.user = User.objects.create_user(email_address='user@example.com', password='pass')
self.manager = User.objects.create_user(email_address='manager@example.com', password='pass', is_manager=True)
self.org = Organization.objects.create(name='Org', owner=self.manager)
self.role = Role.objects.create(name='Engineer', organization=self.org)
self.model = AgentModel.objects.create(name='test-model', version='v1', path='model.gguf')
self.agent = Agent.objects.create(model=self.model, organization=self.org)
def test_agent_model_str(self):
self.assertEqual(str(self.model), 'test-model')
def test_agent_str(self):
self.assertIn(self.model.name, str(self.agent))
def test_agent_run_str(self):
run = AgentRun.objects.create(agent=self.agent, user=self.user)
self.assertIn(str(run.uuid), str(run))
self.assertIn(str(self.agent), str(run))
def test_agent_event_str(self):
run = AgentRun.objects.create(agent=self.agent, user=self.user)
evt = AgentEvent.objects.create(execution=run, event_type='message', content={'msg': 'hi'})
self.assertIn('message', str(evt))
def test_role_rag_document_str(self):
doc = RoleRagDocument.objects.create(
role=self.role,
content='chunk',
content_hash='hash',
chunk_index=0,
)
self.assertIn(self.role.name, str(doc))

View file

@ -1,139 +0,0 @@
from rest_framework.viewsets import ModelViewSet
from rest_framework.permissions import IsAuthenticated
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework import status
from .models import Agent, AgentRun, AgentEvent
from .serializers import AgentSerializer, AgentRunSerializer, AgentEventSerializer
from . import services
from apps.orgs.models import Role
class AgentViewSet(ModelViewSet):
queryset = Agent.objects.all()
serializer_class = AgentSerializer
permission_classes = [IsAuthenticated]
lookup_field = 'uuid'
class AgentRunViewSet(ModelViewSet):
queryset = AgentRun.objects.all()
serializer_class = AgentRunSerializer
permission_classes = [IsAuthenticated]
lookup_field = 'uuid'
def get_queryset(self):
return AgentRun.objects.filter(user=self.request.user)
@action(detail=True, methods=['get'], url_path='events')
def events(self, request, uuid=None):
run = self.get_object()
events = AgentEvent.objects.filter(execution=run).order_by('timestamp')
serializer = AgentEventSerializer(events, many=True)
return Response(serializer.data)
@action(detail=False, methods=['post'], url_path='retrieve-context')
def retrieve_context(self, request):
"""Retrieve context documents from RAG using semantic similarity.
Request body:
{
"query": "search query text",
"role_uuid": "role-uuid",
"top_k": 5, # optional, default 5
"similarity_threshold": 0.5 # optional, default 0.5
}
Returns:
{
"query": "search query text",
"context": "formatted context string with sources",
"documents": [
{
"id": 123,
"content": "chunk text",
"similarity": 0.87,
"source": "filename.pdf",
"chunk_index": 0
},
...
]
}
"""
query = request.data.get('query', '').strip()
role_uuid = request.data.get('role_uuid', '').strip()
top_k = request.data.get('top_k', 5)
similarity_threshold = request.data.get('similarity_threshold', 0.5)
if not query:
return Response(
{"error": "query is required"},
status=status.HTTP_400_BAD_REQUEST
)
if not role_uuid:
return Response(
{"error": "role_uuid is required"},
status=status.HTTP_400_BAD_REQUEST
)
try:
# Validate role exists and user has access
role = Role.objects.get(uuid=role_uuid)
# You can add additional permission checks here if needed
# Search for similar documents
results = services.search_similar_documents(
query=query,
role_uuid=role_uuid,
top_k=top_k,
similarity_threshold=similarity_threshold
)
# Format response
documents = []
for doc, similarity in results:
documents.append({
"id": doc.id,
"uuid": str(doc.uuid),
"content": doc.content,
"similarity": float(similarity),
"source": doc.training_file.file_name if doc.training_file else "unknown",
"chunk_index": doc.chunk_index,
"metadata": doc.metadata,
})
# Get formatted context string
context = services.get_context_for_query(
query=query,
role_uuid=role_uuid,
top_k=top_k,
similarity_threshold=similarity_threshold
)
return Response({
"query": query,
"role_uuid": role_uuid,
"num_results": len(documents),
"context": context,
"documents": documents,
})
except Role.DoesNotExist:
return Response(
{"error": f"Role with UUID {role_uuid} not found"},
status=status.HTTP_404_NOT_FOUND
)
except ValueError as e:
return Response(
{"error": str(e)},
status=status.HTTP_400_BAD_REQUEST
)
except Exception as e:
import logging
logging.exception("Error retrieving context")
return Response(
{"error": "Failed to retrieve context", "detail": str(e)},
status=status.HTTP_500_INTERNAL_SERVER_ERROR
)

View file

@ -1,47 +0,0 @@
from django.contrib import admin
from django.contrib.admin import ModelAdmin, TabularInline
from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
class OnboardingPageInline(TabularInline):
model = OnboardingPage
extra = 0
class OnboardingFieldInline(TabularInline):
model = OnboardingField
extra = 0
@admin.register(OnboardingFlow)
class OnboardingFlowAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'title', 'role', 'status')
search_fields = ('title', 'role__name')
list_filter = ('status',)
inlines = (OnboardingPageInline,)
readonly_fields = ('uuid',)
@admin.register(OnboardingPage)
class OnboardingPageAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'title', 'flow', 'order')
search_fields = ('title', 'flow__title')
list_filter = ('flow',)
inlines = (OnboardingFieldInline,)
readonly_fields = ('uuid',)
@admin.register(OnboardingField)
class OnboardingFieldAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'label', 'page', 'field_type', 'required')
search_fields = ('label', 'page__title')
list_filter = ('field_type',)
readonly_fields = ('uuid',)
@admin.register(OnboardingSession)
class OnboardingSessionAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'flow', 'user', 'status', 'current_page_order')
search_fields = ('flow__title', 'user__email_address')
list_filter = ('status',)
readonly_fields = ('uuid',)

View file

@ -1,6 +0,0 @@
from django.apps import AppConfig
class OnboardingConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.onboarding'

View file

@ -1,100 +0,0 @@
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = [
('orgs', '0001_initial'),
('mlstore', '0001_initial'),
('users', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='OnboardingFlow',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('title', models.CharField(max_length=255)),
('description', models.TextField(blank=True, default='')),
('status', models.CharField(choices=[('draft', 'Draft'), ('published', 'Published'), ('archived', 'Archived')], default='draft', max_length=20)),
('agent', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='onboarding_flows', to='mlstore.agent')),
('role', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='onboarding_flows', to='orgs.role')),
],
options={
'verbose_name': 'Onboarding Flow',
'verbose_name_plural': 'Onboarding Flows',
'ordering': ['-created_at'],
},
),
migrations.CreateModel(
name='OnboardingPage',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('order', models.IntegerField(default=0)),
('title', models.CharField(max_length=255)),
('body', models.TextField(blank=True, default='')),
('meta', models.JSONField(blank=True, default=dict)),
('flow', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pages', to='onboarding.onboardingflow')),
],
options={
'verbose_name': 'Onboarding Page',
'verbose_name_plural': 'Onboarding Pages',
'ordering': ['order', 'created_at'],
},
),
migrations.CreateModel(
name='OnboardingField',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('order', models.IntegerField(default=0)),
('key', models.CharField(max_length=120)),
('label', models.CharField(max_length=255)),
('field_type', models.CharField(choices=[('text', 'Text'), ('textarea', 'Textarea'), ('select', 'Select'), ('multiselect', 'Multi Select'), ('number', 'Number'), ('boolean', 'Boolean'), ('date', 'Date')], default='text', max_length=30)),
('required', models.BooleanField(default=False)),
('help_text', models.TextField(blank=True, default='')),
('placeholder', models.CharField(blank=True, default='', max_length=255)),
('options', models.JSONField(blank=True, default=list)),
('default_value', models.JSONField(blank=True, null=True, default=None)),
('validation', models.JSONField(blank=True, default=dict)),
('page', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='fields', to='onboarding.onboardingpage')),
],
options={
'verbose_name': 'Onboarding Field',
'verbose_name_plural': 'Onboarding Fields',
'ordering': ['order', 'created_at'],
'unique_together': {('page', 'key')},
},
),
migrations.CreateModel(
name='OnboardingSession',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('status', models.CharField(choices=[('in_progress', 'In Progress'), ('completed', 'Completed'), ('abandoned', 'Abandoned')], default='in_progress', max_length=20)),
('current_page_order', models.IntegerField(default=0)),
('responses', models.JSONField(blank=True, default=dict)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('agent_run', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='onboarding_sessions', to='mlstore.agentrun')),
('flow', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='sessions', to='onboarding.onboardingflow')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='onboarding_sessions', to='users.user')),
],
options={
'verbose_name': 'Onboarding Session',
'verbose_name_plural': 'Onboarding Sessions',
'ordering': ['-created_at'],
},
),
]

View file

@ -1,121 +0,0 @@
from uuid import uuid4
from django.db.models import (
BigAutoField,
BooleanField,
CASCADE,
CharField,
DateTimeField,
ForeignKey,
IntegerField,
JSONField,
Model,
TextField,
UUIDField,
)
from apps.users.mixins import TimeStampMixin
from apps.users.models import User
from apps.orgs.models import Role
from apps.mlstore.models import Agent, AgentRun
class OnboardingFlow(TimeStampMixin, Model):
STATUS_CHOICES = [
('draft', 'Draft'),
('published', 'Published'),
('archived', 'Archived'),
]
id = BigAutoField(primary_key=True)
uuid = UUIDField(default=uuid4, editable=False, unique=True)
role = ForeignKey(Role, on_delete=CASCADE, related_name='onboarding_flows')
agent = ForeignKey(Agent, on_delete=CASCADE, related_name='onboarding_flows', null=True, blank=True)
title = CharField(max_length=255)
description = TextField(blank=True, default='')
status = CharField(max_length=20, choices=STATUS_CHOICES, default='draft')
class Meta:
verbose_name = 'Onboarding Flow'
verbose_name_plural = 'Onboarding Flows'
ordering = ['-created_at']
def __str__(self) -> str:
return f'{self.title} ({self.role.name})'
class OnboardingPage(TimeStampMixin, Model):
id = BigAutoField(primary_key=True)
uuid = UUIDField(default=uuid4, editable=False, unique=True)
flow = ForeignKey(OnboardingFlow, on_delete=CASCADE, related_name='pages')
order = IntegerField(default=0)
title = CharField(max_length=255)
body = TextField(blank=True, default='')
meta = JSONField(default=dict, blank=True)
class Meta:
verbose_name = 'Onboarding Page'
verbose_name_plural = 'Onboarding Pages'
ordering = ['order', 'created_at']
def __str__(self) -> str:
return f'{self.flow.title} - {self.title}'
class OnboardingField(TimeStampMixin, Model):
FIELD_TYPES = [
('text', 'Text'),
('textarea', 'Textarea'),
('select', 'Select'),
('multiselect', 'Multi Select'),
('number', 'Number'),
('boolean', 'Boolean'),
('date', 'Date'),
]
id = BigAutoField(primary_key=True)
uuid = UUIDField(default=uuid4, editable=False, unique=True)
page = ForeignKey(OnboardingPage, on_delete=CASCADE, related_name='fields')
order = IntegerField(default=0)
key = CharField(max_length=120)
label = CharField(max_length=255)
field_type = CharField(max_length=30, choices=FIELD_TYPES, default='text')
required = BooleanField(default=False)
help_text = TextField(blank=True, default='')
placeholder = CharField(max_length=255, blank=True, default='')
options = JSONField(default=list, blank=True)
default_value = JSONField(null=True, blank=True, default=None)
validation = JSONField(default=dict, blank=True)
class Meta:
verbose_name = 'Onboarding Field'
verbose_name_plural = 'Onboarding Fields'
ordering = ['order', 'created_at']
unique_together = [['page', 'key']]
def __str__(self) -> str:
return f'{self.page.title} - {self.label}'
class OnboardingSession(TimeStampMixin, Model):
STATUS_CHOICES = [
('in_progress', 'In Progress'),
('completed', 'Completed'),
('abandoned', 'Abandoned'),
]
id = BigAutoField(primary_key=True)
uuid = UUIDField(default=uuid4, editable=False, unique=True)
flow = ForeignKey(OnboardingFlow, on_delete=CASCADE, related_name='sessions')
user = ForeignKey(User, on_delete=CASCADE, related_name='onboarding_sessions')
agent_run = ForeignKey(AgentRun, on_delete=CASCADE, related_name='onboarding_sessions', null=True, blank=True)
status = CharField(max_length=20, choices=STATUS_CHOICES, default='in_progress')
current_page_order = IntegerField(default=0)
responses = JSONField(default=dict, blank=True)
completed_at = DateTimeField(null=True, blank=True)
class Meta:
verbose_name = 'Onboarding Session'
verbose_name_plural = 'Onboarding Sessions'
ordering = ['-created_at']
def __str__(self) -> str:
return f'{self.user.email_address} - {self.flow.title}'

View file

@ -1,105 +0,0 @@
from rest_framework import serializers
from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
from apps.orgs.models import Role
from apps.mlstore.models import Agent
class OnboardingFieldSerializer(serializers.ModelSerializer):
page = serializers.SlugRelatedField(slug_field='uuid', queryset=OnboardingPage.objects.all())
class Meta:
model = OnboardingField
fields = [
'id',
'uuid',
'page',
'order',
'key',
'label',
'field_type',
'required',
'help_text',
'placeholder',
'options',
'default_value',
'validation',
]
read_only_fields = ['id', 'uuid']
class OnboardingPageSerializer(serializers.ModelSerializer):
fields = OnboardingFieldSerializer(many=True, read_only=True)
flow = serializers.SlugRelatedField(slug_field='uuid', queryset=OnboardingFlow.objects.all())
class Meta:
model = OnboardingPage
fields = [
'id',
'uuid',
'flow',
'order',
'title',
'body',
'meta',
'fields',
]
read_only_fields = ['id', 'uuid']
class OnboardingFlowSerializer(serializers.ModelSerializer):
role = serializers.SlugRelatedField(slug_field='uuid', queryset=Role.objects.all())
agent = serializers.SlugRelatedField(slug_field='uuid', queryset=Agent.objects.all(), allow_null=True, required=False)
class Meta:
model = OnboardingFlow
fields = [
'id',
'uuid',
'role',
'agent',
'title',
'description',
'status',
'created_at',
'updated_at',
]
read_only_fields = ['id', 'uuid', 'created_at', 'updated_at']
class OnboardingFlowDetailSerializer(OnboardingFlowSerializer):
pages = OnboardingPageSerializer(many=True, read_only=True)
class Meta(OnboardingFlowSerializer.Meta):
fields = OnboardingFlowSerializer.Meta.fields + ['pages']
class OnboardingSessionSerializer(serializers.ModelSerializer):
flow = serializers.SlugRelatedField(slug_field='uuid', queryset=OnboardingFlow.objects.all())
user = serializers.SlugRelatedField(slug_field='uuid', read_only=True)
agent_run = serializers.SlugRelatedField(slug_field='uuid', read_only=True)
class Meta:
model = OnboardingSession
fields = [
'id',
'uuid',
'flow',
'user',
'agent_run',
'status',
'current_page_order',
'responses',
'created_at',
'updated_at',
'completed_at',
]
read_only_fields = ['id', 'uuid', 'user', 'agent_run', 'created_at', 'updated_at', 'completed_at']
class OnboardingSubmissionSerializer(serializers.Serializer):
page_uuid = serializers.CharField()
responses = serializers.DictField()
mark_complete = serializers.BooleanField(required=False, default=False)
class OnboardingFeedbackSerializer(serializers.Serializer):
page_uuid = serializers.CharField()
responses = serializers.DictField()
question = serializers.CharField(required=False, allow_blank=True, default='')

View file

@ -1,124 +0,0 @@
from django.contrib.auth import get_user_model
from django.test import TestCase
from rest_framework.test import APIRequestFactory, force_authenticate
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_403_FORBIDDEN
from apps.orgs.models import Organization, Role
from apps.mlstore.models import AgentModel, Agent
from apps.onboarding.models import OnboardingFlow, OnboardingPage, OnboardingSession
from apps.onboarding.viewsets import OnboardingFlowViewSet, OnboardingSessionViewSet
User = get_user_model()
class OnboardingAPITests(TestCase):
def setUp(self):
self.factory = APIRequestFactory()
self.user = User.objects.create_user(email_address='user@example.com', password='pass')
self.manager = User.objects.create_user(email_address='manager@example.com', password='pass', is_manager=True)
self.org = Organization.objects.create(name='Org', owner=self.manager)
self.role = Role.objects.create(name='Engineer', organization=self.org)
self.model = AgentModel.objects.create(name='test-model', version='v1', path='model.gguf')
self.agent = Agent.objects.create(model=self.model, organization=self.org)
def test_create_flow(self):
view = OnboardingFlowViewSet.as_view({'post': 'create'})
data = {
'role': str(self.role.uuid),
'agent': str(self.agent.uuid),
'title': 'Flow',
'description': 'Desc',
'status': 'draft',
}
request = self.factory.post('/', data)
force_authenticate(request, user=self.manager)
response = view(request)
self.assertIn(response.status_code, (HTTP_200_OK, HTTP_201_CREATED))
self.assertTrue(OnboardingFlow.objects.filter(title='Flow').exists())
def test_pages_action(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
OnboardingPage.objects.create(flow=flow, order=0, title='Page 1', body='Body')
view = OnboardingFlowViewSet.as_view({'get': 'pages'})
request = self.factory.get('/')
force_authenticate(request, user=self.manager)
response = view(request, uuid=str(flow.uuid))
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertEqual(len(response.data.get('pages', [])), 1)
def test_create_session(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
view = OnboardingSessionViewSet.as_view({'post': 'create'})
request = self.factory.post('/', {'flow': str(flow.uuid)})
force_authenticate(request, user=self.user)
response = view(request)
self.assertIn(response.status_code, (HTTP_200_OK, HTTP_201_CREATED))
self.assertTrue(OnboardingSession.objects.filter(flow=flow, user=self.user).exists())
def test_submit_updates_session(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
page = OnboardingPage.objects.create(flow=flow, order=0, title='Page 1', body='Body')
session = OnboardingSession.objects.create(flow=flow, user=self.user)
view = OnboardingSessionViewSet.as_view({'post': 'submit'})
payload = {'page_uuid': str(page.uuid), 'responses': {'q1': 'a'}, 'mark_complete': True}
request = self.factory.post('/', payload, format='json')
force_authenticate(request, user=self.user)
response = view(request, uuid=str(session.uuid))
self.assertEqual(response.status_code, HTTP_200_OK)
session.refresh_from_db()
self.assertEqual(session.status, 'completed')
self.assertIn(str(page.uuid), session.responses)
def test_publish_flow_as_manager(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
self.assertEqual(flow.status, 'draft')
view = OnboardingFlowViewSet.as_view({'post': 'publish'})
request = self.factory.post('/')
force_authenticate(request, user=self.manager)
response = view(request, uuid=str(flow.uuid))
self.assertEqual(response.status_code, HTTP_200_OK)
flow.refresh_from_db()
self.assertEqual(flow.status, 'published')
def test_publish_flow_requires_manager(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
view = OnboardingFlowViewSet.as_view({'post': 'publish'})
request = self.factory.post('/')
force_authenticate(request, user=self.user)
response = view(request, uuid=str(flow.uuid))
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
def test_get_or_create_session_creates_when_missing(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
view = OnboardingSessionViewSet.as_view({'post': 'get_or_create'})
request = self.factory.post('/', {'flow': str(flow.uuid)})
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertTrue(OnboardingSession.objects.filter(flow=flow, user=self.user).exists())
def test_get_or_create_session_reuses_active(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
existing = OnboardingSession.objects.create(flow=flow, user=self.user, current_page_order=1)
view = OnboardingSessionViewSet.as_view({'post': 'get_or_create'})
request = self.factory.post('/', {'flow': str(flow.uuid)})
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertEqual(response.data.get('uuid'), str(existing.uuid))
self.assertEqual(response.data.get('current_page_order'), 1)
def test_get_or_create_session_creates_after_completion(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
completed = OnboardingSession.objects.create(flow=flow, user=self.user, status='completed')
view = OnboardingSessionViewSet.as_view({'post': 'get_or_create'})
request = self.factory.post('/', {'flow': str(flow.uuid)})
force_authenticate(request, user=self.user)
response = view(request)
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertNotEqual(response.data.get('uuid'), str(completed.uuid))

View file

@ -1,41 +0,0 @@
from django.test import TestCase
from django.contrib.auth import get_user_model
from apps.orgs.models import Organization, Role
from apps.mlstore.models import AgentModel, Agent
from apps.onboarding.models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
User = get_user_model()
class OnboardingModelTests(TestCase):
def setUp(self):
self.user = User.objects.create_user(email_address='user@example.com', password='pass')
self.manager = User.objects.create_user(email_address='manager@example.com', password='pass', is_manager=True)
self.org = Organization.objects.create(name='Org', owner=self.manager)
self.role = Role.objects.create(name='Engineer', organization=self.org)
self.model = AgentModel.objects.create(name='test-model', version='v1', path='model.gguf')
self.agent = Agent.objects.create(model=self.model, organization=self.org)
def test_flow_str(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Welcome', description='Intro')
self.assertIn('Welcome', str(flow))
self.assertIn(self.role.name, str(flow))
def test_page_and_field_str(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
page = OnboardingPage.objects.create(flow=flow, order=0, title='Page 1', body='Body')
field = OnboardingField.objects.create(page=page, order=0, key='q1', label='Question 1')
self.assertIn(flow.title, str(page))
self.assertIn(field.label, str(field))
def test_field_unique_key_per_page(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
page = OnboardingPage.objects.create(flow=flow, order=0, title='Page 1', body='Body')
OnboardingField.objects.create(page=page, order=0, key='dup', label='Dup 1')
with self.assertRaises(Exception):
OnboardingField.objects.create(page=page, order=1, key='dup', label='Dup 2')
def test_session_str(self):
flow = OnboardingFlow.objects.create(role=self.role, agent=self.agent, title='Flow', description='')
session = OnboardingSession.objects.create(flow=flow, user=self.user)
self.assertIn(self.user.email_address, str(session))
self.assertIn(flow.title, str(session))

View file

@ -1,451 +0,0 @@
import json
import logging
import re
import html
from typing import Any
from django.db import transaction
from django.utils import timezone
from rest_framework import status
from rest_framework.exceptions import PermissionDenied
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.viewsets import ModelViewSet
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
from apps.mlstore.models import AgentEvent, AgentRun
from apps.mlstore import services as ml_services
from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
from .serializers import (
OnboardingFlowSerializer,
OnboardingFlowDetailSerializer,
OnboardingPageSerializer,
OnboardingFieldSerializer,
OnboardingSessionSerializer,
OnboardingSubmissionSerializer,
OnboardingFeedbackSerializer,
)
logger = logging.getLogger(__name__)
def _extract_json(text: str) -> dict[str, Any]:
if not text:
return {}
try:
return json.loads(text)
except Exception:
pass
# Prefer fenced json blocks
fenced = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, re.IGNORECASE)
if fenced:
try:
return json.loads(fenced.group(1))
except Exception:
return {}
# Fallback: find first balanced JSON object
start = text.find('{')
if start == -1:
return {}
depth = 0
for idx in range(start, len(text)):
char = text[idx]
if char == '{':
depth += 1
elif char == '}':
depth -= 1
if depth == 0:
candidate = text[start:idx + 1]
try:
return json.loads(candidate)
except Exception:
return {}
return {}
def _strip_html(text: str) -> str:
if not text:
return ""
cleaned = re.sub(r"<[^>]+>", " ", text)
cleaned = html.unescape(cleaned)
return re.sub(r"\s+", " ", cleaned).strip()
def _send_agent_progress_event(agent_run: AgentRun, content: dict):
try:
AgentEvent.objects.create(
execution=agent_run,
event_type='progress',
content=content,
)
room_group_name = f"mlstore_agent_{agent_run.agent.uuid}"
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_event",
"event_type": "progress",
"content": content,
"timestamp": timezone.now().isoformat(),
},
)
except Exception as e:
logger.warning("Failed to send progress event: %s", e)
class OnboardingFlowViewSet(ModelViewSet):
queryset = OnboardingFlow.objects.select_related('role', 'agent').all()
serializer_class = OnboardingFlowSerializer
lookup_field = 'uuid'
def get_queryset(self):
qs = super().get_queryset()
role_uuid = self.request.query_params.get('role')
status_filter = self.request.query_params.get('status')
if role_uuid:
qs = qs.filter(role__uuid=role_uuid)
if status_filter:
qs = qs.filter(status=status_filter)
return qs
def get_serializer_class(self):
if self.action in ('retrieve', 'pages'):
return OnboardingFlowDetailSerializer
return super().get_serializer_class()
@action(detail=True, methods=['get'])
def pages(self, request, pk=None, uuid=None):
flow = self.get_object()
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['post'])
def generate(self, request, pk=None, uuid=None):
flow = self.get_object()
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
if not flow.agent or not flow.agent.model or not flow.agent.model.path:
return Response(
{"error": "flow_agent_model_required"},
status=status.HTTP_400_BAD_REQUEST,
)
instructions = request.data.get('instructions') or ''
rag_context = ""
try:
rag_context = ml_services.get_context_for_query(
query=f"Create onboarding content for role {flow.role.name}",
role_uuid=str(flow.role.uuid),
top_k=6,
similarity_threshold=0.35,
)
except Exception as e:
logger.warning("Onboarding generation RAG lookup failed: %s", e)
prompt = (
"You are creating onboarding content as JSON. "
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
"Do not include explanations or examples.\n"
"Do not include HTML tags. Use plain text only.\n"
"Each page body must be 3-6 paragraphs, at least 320 words total, and include 1 short list of 3-5 bullets.\n"
"Before writing the body, create a brief outline of the key points to cover and include it in meta.outline.\n"
"The outline should be a short list of 3-6 bullets, not chain-of-thought.\n"
"Do NOT ask about the learner's personal experience. Ask about what someone in the role may encounter.\n"
"Do NOT use any select or multiselect fields. Use only text, textarea, number, boolean, or date.\n"
"Use the provided context for accurate, role-specific content.\n"
"If context is insufficient, make reasonable assumptions without inventing tools or policies.\n"
"JSON shape:\n"
"{\n"
" \"title\": string,\n"
" \"description\": string,\n"
" \"pages\": [\n"
" {\n"
" \"title\": string,\n"
" \"body\": string,\n"
" \"meta\": { \"outline\": [string] },\n"
" \"fields\": [\n"
" {\n"
" \"key\": string,\n"
" \"label\": string,\n"
" \"type\": one of [text, textarea, number, boolean, date],\n"
" \"required\": boolean,\n"
" \"help_text\": string,\n"
" \"placeholder\": string,\n"
" \"options\": []\n"
" }\n"
" ]\n"
" }\n"
" ]\n"
"}\n"
f"Role: {flow.role.name}\n"
f"Role description: {flow.role.description}\n"
f"Flow title: {flow.title}\n"
f"Flow description: {flow.description}\n"
f"Extra instructions: {instructions}\n"
f"Context:\n{rag_context}\n"
)
try:
result = ml_services.infer_with_model(flow.agent.model.path, prompt, {
"max_tokens": 1800,
"temperature": 0.2,
})
except Exception as e:
logger.error("Onboarding generate inference failed: %s", e, exc_info=True)
return Response({"error": "generation_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
response_text = ''
if isinstance(result, dict):
response_text = result.get('response') or result.get('result') or ''
payload = _extract_json(str(response_text))
if not payload or 'pages' not in payload:
return Response({"error": "invalid_generation_output", "raw": response_text}, status=status.HTTP_400_BAD_REQUEST)
with transaction.atomic():
flow.title = payload.get('title') or flow.title
# Keep existing description on regenerate unless explicitly empty
if not flow.description:
flow.description = payload.get('description') or flow.description
if flow.status != 'draft':
flow.status = 'draft'
flow.save(update_fields=['title', 'description', 'status'])
OnboardingPage.objects.filter(flow=flow).delete()
pages = payload.get('pages') or []
for page_index, page in enumerate(pages):
body_text = _strip_html(page.get('body') or '')
page_obj = OnboardingPage.objects.create(
flow=flow,
order=page_index,
title=page.get('title') or f"Page {page_index + 1}",
body=body_text,
meta=page.get('meta') or {},
)
for field_index, field in enumerate(page.get('fields') or []):
field_type = field.get('type') or 'text'
if field_type not in {"text", "textarea", "number", "boolean", "date"}:
field_type = 'text'
OnboardingField.objects.create(
page=page_obj,
order=field_index,
key=field.get('key') or f"field_{field_index + 1}",
label=field.get('label') or f"Field {field_index + 1}",
field_type=field_type,
required=bool(field.get('required')),
help_text=field.get('help_text') or '',
placeholder=field.get('placeholder') or '',
options=[],
default_value=field.get('default_value') if field.get('default_value') is not None else None,
validation=field.get('validation') or {},
)
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['post'])
def publish(self, request, pk=None, uuid=None):
flow = self.get_object()
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
if flow.status != 'published':
flow.status = 'published'
flow.save(update_fields=['status'])
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
class OnboardingPageViewSet(ModelViewSet):
queryset = OnboardingPage.objects.select_related('flow').prefetch_related('fields').all()
serializer_class = OnboardingPageSerializer
lookup_field = 'uuid'
class OnboardingFieldViewSet(ModelViewSet):
queryset = OnboardingField.objects.select_related('page').all()
serializer_class = OnboardingFieldSerializer
lookup_field = 'uuid'
class OnboardingSessionViewSet(ModelViewSet):
queryset = OnboardingSession.objects.select_related('flow', 'user', 'agent_run', 'flow__agent').all()
serializer_class = OnboardingSessionSerializer
lookup_field = 'uuid'
def get_queryset(self):
qs = super().get_queryset()
user = self.request.user
if user.is_authenticated and not getattr(user, 'is_manager', False):
qs = qs.filter(user=user)
return qs
def perform_create(self, serializer):
if not self.request.user or not self.request.user.is_authenticated:
raise PermissionDenied("Authentication required")
flow = serializer.validated_data.get('flow')
agent_run = None
if flow and flow.agent:
agent_run = AgentRun.objects.create(
agent=flow.agent,
user=self.request.user,
input_data={
"type": "onboarding_session",
"flow_uuid": str(flow.uuid),
"role_uuid": str(flow.role.uuid),
},
)
serializer.save(user=self.request.user, agent_run=agent_run)
@action(detail=False, methods=['post'])
def get_or_create(self, request):
if not request.user or not request.user.is_authenticated:
raise PermissionDenied("Authentication required")
flow_uuid = request.data.get('flow')
if not flow_uuid:
return Response({"error": "flow_required"}, status=status.HTTP_400_BAD_REQUEST)
try:
flow = OnboardingFlow.objects.get(uuid=flow_uuid)
except OnboardingFlow.DoesNotExist:
return Response({"error": "flow_not_found"}, status=status.HTTP_404_NOT_FOUND)
session = (
OnboardingSession.objects
.filter(flow=flow, user=request.user)
.exclude(status='completed')
.order_by('-updated_at')
.first()
)
if not session:
agent_run = None
if flow.agent:
agent_run = AgentRun.objects.create(
agent=flow.agent,
user=request.user,
input_data={
"type": "onboarding_session",
"flow_uuid": str(flow.uuid),
"role_uuid": str(flow.role.uuid),
},
)
session = OnboardingSession.objects.create(
flow=flow,
user=request.user,
agent_run=agent_run,
)
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
@action(detail=True, methods=['post'])
def submit(self, request, pk=None, uuid=None):
session = self.get_object()
serializer = OnboardingSubmissionSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
page_uuid = serializer.validated_data['page_uuid']
responses = serializer.validated_data['responses']
mark_complete = serializer.validated_data.get('mark_complete')
try:
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
except OnboardingPage.DoesNotExist:
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
responses_payload = dict(session.responses or {})
responses_payload[str(page.uuid)] = responses
session.responses = responses_payload
session.current_page_order = page.order
if mark_complete or page.order >= session.flow.pages.count() - 1:
session.status = 'completed'
session.completed_at = timezone.now()
session.save(update_fields=['responses', 'current_page_order', 'status', 'completed_at'])
if session.agent_run:
progress_payload = {
"flow_uuid": str(session.flow.uuid),
"session_uuid": str(session.uuid),
"page_uuid": str(page.uuid),
"page_order": page.order,
"status": session.status,
"responses": responses,
}
_send_agent_progress_event(session.agent_run, progress_payload)
session.agent_run.output_data = {
**(session.agent_run.output_data or {}),
"onboarding": session.responses,
}
session.agent_run.save(update_fields=['output_data'])
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
@action(detail=True, methods=['post'])
def feedback(self, request, pk=None, uuid=None):
session = self.get_object()
serializer = OnboardingFeedbackSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
page_uuid = serializer.validated_data['page_uuid']
responses = serializer.validated_data['responses']
question = serializer.validated_data.get('question') or ''
try:
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
except OnboardingPage.DoesNotExist:
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
if not session.flow.agent or not session.flow.agent.model or not session.flow.agent.model.path:
return Response({"error": "flow_agent_model_required"}, status=status.HTTP_400_BAD_REQUEST)
prompt = (
"You are an onboarding assessor. Provide concise feedback addressed directly to the learner using second-person \"You\" statements.\n"
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
"JSON shape:\n"
"{\n"
" \"summary\": string\n"
"}\n\n"
f"Page title: {page.title}\n"
f"Page body: {page.body}\n"
f"Responses: {json.dumps(responses)}\n"
)
if question:
prompt += f"Learner question: {question}\n"
try:
result = ml_services.infer_with_model(session.flow.agent.model.path, prompt, {
"max_tokens": 900,
"temperature": 0.2,
})
except Exception as e:
logger.error("Onboarding feedback inference failed: %s", e, exc_info=True)
return Response({"error": "feedback_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
feedback_text = ''
if isinstance(result, dict):
feedback_text = result.get('response') or result.get('result') or ''
feedback_text = str(feedback_text).strip()
feedback_payload = _extract_json(feedback_text)
if not feedback_payload:
feedback_payload = {
"summary": feedback_text or "Feedback generated.",
}
responses_payload = dict(session.responses or {})
feedback_store = dict(responses_payload.get("__feedback__") or {})
feedback_store[str(page.uuid)] = {
"feedback": feedback_payload,
"question": question,
"updated_at": timezone.now().isoformat(),
}
responses_payload["__feedback__"] = feedback_store
session.responses = responses_payload
session.save(update_fields=['responses'])
return Response({
"feedback": feedback_payload,
"session": OnboardingSessionSerializer(session, context={'request': request}).data,
})

View file

View file

@ -1,61 +0,0 @@
from django.contrib.admin import ModelAdmin, TabularInline, register
from apps.orgs.models import Organization, OrganizationInvitation, OrganizationMembership, Role, RoleMembership, TrainingFile
class OrganizationMembershipInline(TabularInline):
model = OrganizationMembership
extra = 0
raw_id_fields = ('user',)
class RoleInline(TabularInline):
model = Role
extra = 0
class RoleMembershipInline(TabularInline):
model = RoleMembership
extra = 0
raw_id_fields = ('user',)
@register(Organization)
class OrganizationAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'name', 'owner', 'created_at', 'updated_at')
search_fields = ('name', 'owner__email_address')
list_filter = ('created_at',)
inlines = (OrganizationMembershipInline, RoleInline)
raw_id_fields = ('owner',)
readonly_fields = ('uuid', 'created_at', 'updated_at')
@register(OrganizationMembership)
class OrganizationMembershipAdmin(ModelAdmin):
list_display = ('id', 'user', 'organization')
search_fields = ('user__email_address', 'organization__name')
list_filter = ('created_at',)
raw_id_fields = ('user', 'organization')
@register(OrganizationInvitation)
class OrganizationInvitationAdmin(ModelAdmin):
list_display = ('id', 'token', 'organization', 'created_by', 'is_active', 'expires_at', 'max_uses', 'created_at', 'uses')
search_fields = ('token', 'organization__name', 'created_by__email_address')
list_filter = ('is_active',)
raw_id_fields = ('organization', 'created_by')
readonly_fields = ('token', 'created_at')
@register(Role)
class RoleAdmin(ModelAdmin):
list_display = ('id', 'name', 'organization', 'uuid')
search_fields = ('name', 'organization__name')
raw_id_fields = ('organization',)
inlines = (RoleMembershipInline,)
readonly_fields = ('uuid',)
@register(RoleMembership)
class RoleMembershipAdmin(ModelAdmin):
list_display = ('id', 'user', 'role')
raw_id_fields = ('user', 'role')
@register(TrainingFile)
class TrainingFileAdmin(ModelAdmin):
list_display = ('id', 'uuid', 'file_name', 'role', 'uploaded_by', 'status', 'is_processed', 'created_at')
search_fields = ('file_name', 'role__name', 'uploaded_by__email_address')
list_filter = ('status', 'is_processed', 'created_at')
raw_id_fields = ('role', 'uploaded_by')
readonly_fields = ('uuid', 'created_at', 'updated_at')

View file

@ -1,5 +0,0 @@
from django.apps import AppConfig
class OrgsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.orgs'

View file

@ -1,129 +0,0 @@
import django.db.models.deletion
import uuid
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='Organization',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(max_length=255, unique=True)),
('description', models.TextField(blank=True, default='')),
('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_organizations', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Organization',
'verbose_name_plural': 'Organizations',
},
),
migrations.CreateModel(
name='OrganizationInvitation',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('token', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('expires_at', models.DateTimeField()),
('uses', models.IntegerField(default=0)),
('max_uses', models.IntegerField(default=1)),
('is_active', models.BooleanField(default=True)),
('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='created_invites', to=settings.AUTH_USER_MODEL)),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='invite_tokens', to='orgs.organization')),
],
options={
'verbose_name': 'Invite Token',
'verbose_name_plural': 'Invite Tokens',
},
),
migrations.CreateModel(
name='OrganizationMembership',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='memberships', to='orgs.organization')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='organization_memberships', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Organization Membership',
'verbose_name_plural': 'Organization Memberships',
'unique_together': {('user', 'organization')},
},
),
migrations.AddField(
model_name='organization',
name='members',
field=models.ManyToManyField(related_name='organizations', through='orgs.OrganizationMembership', to=settings.AUTH_USER_MODEL),
),
migrations.CreateModel(
name='Role',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('name', models.CharField(max_length=100, unique=True)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('description', models.TextField(blank=True, default='')),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='roles', to='orgs.organization')),
],
options={
'verbose_name': 'Role',
'verbose_name_plural': 'Roles',
},
),
migrations.CreateModel(
name='RoleMembership',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('role', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='memberships', to='orgs.role')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='role_memberships', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Role Membership',
'verbose_name_plural': 'Role Memberships',
'unique_together': {('user', 'role')},
},
),
migrations.AddField(
model_name='role',
name='members',
field=models.ManyToManyField(related_name='roles', through='orgs.RoleMembership', to=settings.AUTH_USER_MODEL),
),
migrations.CreateModel(
name='TrainingFile',
fields=[
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.BigAutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('file', models.FileField(upload_to='training_files/%Y/%m/%d/')),
('file_name', models.CharField(max_length=255)),
('file_size', models.IntegerField()),
('file_type', models.CharField(max_length=50)),
('description', models.TextField(blank=True, default='')),
('status', models.CharField(choices=[('ingesting', 'Ingesting'), ('chunked', 'Chunked'), ('embedded', 'Embedded'), ('failed', 'Failed')], default='ingesting', max_length=20)),
('is_processed', models.BooleanField(default=False)),
('role', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='training_files', to='orgs.role')),
('uploaded_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='uploaded_training_files', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Training File',
'verbose_name_plural': 'Training Files',
'ordering': ['-created_at'],
},
),
]

View file

@ -1,158 +0,0 @@
from datetime import timedelta
from uuid import uuid4
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from django.db.models import BigAutoField, BooleanField, CASCADE, CharField, DateTimeField, ForeignKey, ManyToManyField, Model, TextField, UUIDField, IntegerField, FileField
from django.db.models.signals import post_delete, post_save
from django.db import transaction
from django.dispatch import receiver
from apps.users.mixins import TimeStampMixin
from apps.users.models import User
class Organization(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, unique = True, editable = False)
name = CharField(max_length = 255, unique = True)
description = TextField(blank = True, default = '')
owner = ForeignKey(User, on_delete = CASCADE, related_name = 'owned_organizations')
members = ManyToManyField(User, through = 'OrganizationMembership', related_name = 'organizations')
class Meta:
verbose_name = _('Organization')
verbose_name_plural = _('Organizations')
def __str__(self) -> str:
return self.name
class OrganizationMembership(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
user = ForeignKey(User, on_delete = CASCADE, related_name = 'organization_memberships')
organization = ForeignKey(Organization, on_delete = CASCADE, related_name = 'memberships')
class Meta:
verbose_name = _('Organization Membership')
verbose_name_plural = _('Organization Memberships')
unique_together = [['user', 'organization']]
def __str__(self) -> str:
return f'{self.user.full_name} - {self.organization.name}'
class OrganizationInvitation(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
token = UUIDField(default = uuid4, unique = True, editable = False)
organization = ForeignKey(Organization, on_delete = CASCADE, related_name = "invite_tokens")
created_by = ForeignKey(User, on_delete = CASCADE, related_name = "created_invites")
expires_at = DateTimeField()
uses = IntegerField(default = 0)
max_uses = IntegerField(default = 1)
is_active = BooleanField(default = True)
class Meta:
verbose_name = _("Invite Token")
verbose_name_plural = _("Invite Tokens")
def save(self, *args, **kwargs):
if not self.expires_at:
self.expires_at = timezone.now() + timedelta(days=7)
super().save(*args, **kwargs)
def is_valid(self):
return self.is_active and self.uses < self.max_uses and timezone.now() < self.expires_at
def __str__(self) -> str:
return f"Invite for {self.organization.name} by {self.created_by.full_name} (expires {self.expires_at})"
class Role(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
name = CharField(max_length = 100, unique = True)
uuid = UUIDField(default = uuid4, editable = False, unique = True)
description = TextField(blank = True, default = '')
organization = ForeignKey(Organization, on_delete = CASCADE, related_name = "roles")
members = ManyToManyField(User, through = "RoleMembership", related_name = "roles")
class Meta:
verbose_name = _('Role')
verbose_name_plural = _('Roles')
def __str__(self) -> str:
return self.name
class RoleMembership(TimeStampMixin, Model):
id = BigAutoField(primary_key = True)
user = ForeignKey(User, on_delete = CASCADE, related_name = "role_memberships")
role = ForeignKey(Role, on_delete = CASCADE, related_name = "memberships")
class Meta:
verbose_name = _("Role Membership")
verbose_name_plural = _("Role Memberships")
unique_together = [["user", "role"]]
def __str__(self) -> str:
return f"{self.user.full_name} - {self.role.name}"
class TrainingFile(TimeStampMixin, Model):
ALLOWED_EXTENSIONS = ('txt', 'pdf', 'md', 'csv', 'json', 'docx', 'doc')
STATUS_CHOICES = [
('ingesting', 'Ingesting'),
('chunked', 'Chunked'),
('embedded', 'Embedded'),
('failed', 'Failed'),
]
id = BigAutoField(primary_key = True)
uuid = UUIDField(default = uuid4, unique = True, editable = False)
role = ForeignKey(Role, on_delete = CASCADE, related_name = "training_files")
uploaded_by = ForeignKey(User, on_delete = CASCADE, related_name = "uploaded_training_files")
file = FileField(upload_to = 'training_files/%Y/%m/%d/')
file_name = CharField(max_length = 255)
file_size = IntegerField()
file_type = CharField(max_length = 50)
description = TextField(blank = True, default = '')
status = CharField(max_length = 20, choices = STATUS_CHOICES, default = 'ingesting')
is_processed = BooleanField(default = False)
class Meta:
verbose_name = _("Training File")
verbose_name_plural = _("Training Files")
ordering = ['-created_at']
def __str__(self) -> str:
return f"{self.file_name} - {self.role.name}"
@receiver(post_delete, sender=TrainingFile)
def delete_training_file_on_delete(sender, instance, **kwargs):
if instance.file:
try:
import os
if os.path.isfile(instance.file.path):
os.remove(instance.file.path)
except Exception:
pass
@receiver(post_save, sender=TrainingFile)
def enqueue_training_file_ingestion(sender, instance, created, **kwargs):
if not created:
return
def _enqueue():
from apps.mlstore.tasks import ingest_training_file_task
ingest_training_file_task.delay(str(instance.uuid))
transaction.on_commit(_enqueue)

View file

@ -1,127 +0,0 @@
from rest_framework.serializers import ModelSerializer, SerializerMethodField, IntegerField, UUIDField
from rest_framework.exceptions import ValidationError
from apps.orgs.models import Organization, OrganizationMembership, OrganizationInvitation, Role, RoleMembership, TrainingFile
from apps.users.serializers import UserSerializer
class OrganizationSerializer(ModelSerializer):
owner = UserSerializer(read_only = True)
member_count = SerializerMethodField()
role_count = SerializerMethodField()
class Meta:
model = Organization
fields = ['id', 'uuid', 'name', 'description', 'owner', 'created_at', 'updated_at', 'member_count', 'role_count']
read_only_fields = ['uuid', 'owner', 'created_at', 'updated_at']
def get_member_count(self, obj):
return obj.memberships.count()
def get_role_count(self, obj):
return obj.roles.count()
class OrganizationMembershipSerializer(ModelSerializer):
user = UserSerializer(read_only = True)
user_id = IntegerField(write_only = True, required = False)
class Meta:
model = OrganizationMembership
fields = ['id', 'user', 'user_id', 'organization', 'created_at', 'updated_at']
read_only_fields = ['organization', 'created_at', 'updated_at']
def create(self, validated_data):
user_id = validated_data.pop('user_id', None)
if user_id:
validated_data['user_id'] = user_id
return super().create(validated_data)
class OrganizationInvitationSerializer(ModelSerializer):
created_by = UserSerializer(read_only = True)
invite_url = SerializerMethodField()
is_valid = SerializerMethodField()
class Meta:
model = OrganizationInvitation
fields = ['id', 'token', 'organization', 'created_by', 'expires_at', 'max_uses', 'is_active', 'invite_url', 'is_valid', 'created_at', 'updated_at', 'uses']
read_only_fields = ['token', 'organization', 'created_by', 'max_uses', 'created_at', 'updated_at', 'uses']
def get_invite_url(self, obj):
request = self.context.get('request')
if request:
return request.build_absolute_uri(f'/invite/{obj.token}')
return f'/invite/{obj.token}'
def get_is_valid(self, obj):
return obj.is_valid()
class RoleMembershipSerializer(ModelSerializer):
user = UserSerializer(read_only = True)
class Meta:
model = RoleMembership
fields = ['id', 'user', 'role', 'created_at']
read_only_fields = ['created_at']
class RoleSerializer(ModelSerializer):
organization = OrganizationSerializer(read_only = True)
member_count = SerializerMethodField()
class Meta:
model = Role
fields = ['id', 'uuid', 'name', 'organization', 'member_count', 'description', 'created_at', 'updated_at']
read_only_fields = ['uuid', 'organization', 'created_at', 'updated_at']
def get_member_count(self, obj):
return obj.memberships.count()
class TrainingFileSerializer(ModelSerializer):
uploaded_by = UserSerializer(read_only = True)
file_url = SerializerMethodField()
role = RoleSerializer(read_only = True)
role_uuid = UUIDField(write_only = True, required = True)
class Meta:
model = TrainingFile
fields = ['id', 'uuid', 'role', 'role_uuid', 'uploaded_by', 'file', 'file_name', 'file_size', 'file_type', 'description', 'status', 'is_processed', 'file_url', 'created_at', 'updated_at']
read_only_fields = ['uuid', 'uploaded_by', 'file_size', 'file_type', 'status', 'is_processed', 'created_at', 'updated_at', 'role']
def get_file_url(self, obj):
request = self.context.get('request')
if request and obj.file:
return request.build_absolute_uri(obj.file.url)
return None
def validate_file(self, value):
if not value:
raise ValueError('File is required')
import os
file_extension = os.path.splitext(value.name)[1][1:].lower()
if file_extension not in TrainingFile.ALLOWED_EXTENSIONS:
raise ValueError(
f'File type ".{file_extension}" is not allowed. '
f'Allowed types: {", ".join(TrainingFile.ALLOWED_EXTENSIONS)}'
)
max_size = 50 * 1024 * 1024
if value.size > max_size:
raise ValueError(f'File size must not exceed 50MB. Current size: {value.size / 1024 / 1024:.2f}MB')
return value
def create(self, validated_data):
role_uuid = validated_data.pop('role_uuid', None)
file_obj = validated_data.get('file')
if file_obj:
validated_data['file_size'] = file_obj.size
import os
file_extension = os.path.splitext(file_obj.name)[1][1:].lower()
validated_data['file_type'] = file_extension
if not role_uuid:
raise ValidationError({'role_uuid': 'Role is required'})
try:
role = Role.objects.get(uuid = role_uuid)
except Role.DoesNotExist:
raise ValidationError({'role_uuid': 'Role not found'})
validated_data['role'] = role
return super().create(validated_data)

View file

@ -1,256 +0,0 @@
from django.contrib.auth import get_user_model
from django.test import TestCase
from django.utils import timezone
from rest_framework.test import APIRequestFactory, force_authenticate
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
from apps.orgs.viewsets import OrganizationViewSet
from apps.orgs.models import Organization, OrganizationMembership, OrganizationInvitation, RoleMembership
User = get_user_model()
class OrganizationAPITests(TestCase):
def setUp(self):
self.factory = APIRequestFactory()
self.user = User.objects.create_user(email_address='apiuser@example.com', password='pass')
self.manager = User.objects.create_user(email_address='manager@example.com', password='pass', is_manager=True)
def test_create_organization_creates_membership(self):
data = {'name': 'API Org', 'description': 'Created via API'}
view = OrganizationViewSet.as_view({'post': 'create'})
request = self.factory.post('/', data)
force_authenticate(request, user=self.user)
response = view(request)
self.assertIn(response.status_code, (HTTP_201_CREATED, HTTP_200_OK))
org = Organization.objects.get(name='API Org')
self.assertTrue(OrganizationMembership.objects.filter(organization=org, user=self.user).exists())
def test_invite_accept_flow(self):
org = Organization.objects.create(name='InviteOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
org_view = OrganizationViewSet.as_view({'post': 'create_invite'})
request = self.factory.post('/', {})
force_authenticate(request, user=self.manager)
response = org_view(request, uuid=str(org.uuid))
self.assertIn(response.status_code, (HTTP_201_CREATED, HTTP_200_OK))
token = response.data.get('token')
other = User.objects.create_user(email_address='other@example.com', password='pass')
invite_view = OrganizationViewSet.as_view({'post': 'join'})
req2 = self.factory.post('/', {})
force_authenticate(req2, user=other)
resp2 = invite_view(req2, token=str(token))
self.assertIn(resp2.status_code, (HTTP_200_OK, HTTP_201_CREATED))
self.assertTrue(OrganizationMembership.objects.filter(organization=org, user=other).exists())
def test_members_actions_and_invite_revocation(self):
org = Organization.objects.create(name='ActionsOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
member = User.objects.create_user(email_address='member@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=member,)
members_view = OrganizationViewSet.as_view({'get': 'list_members'})
req = self.factory.get('/')
force_authenticate(req, user=self.manager)
resp = members_view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_200_OK)
self.assertTrue(any(m['email_address'] == 'member@example.com' for m in resp.data))
member.is_manager = True
member.save()
member.refresh_from_db()
self.assertTrue(member.is_manager)
remove_view = OrganizationViewSet.as_view({'post': 'remove_member'})
req3 = self.factory.post('/')
force_authenticate(req3, user=self.manager)
resp3 = remove_view(req3, uuid=str(org.uuid), user_id=str(org.owner.id))
self.assertEqual(resp3.status_code, HTTP_403_FORBIDDEN)
invites_view = OrganizationViewSet.as_view({'post': 'create_invite', 'get': 'list_invites'})
req4 = self.factory.post('/')
force_authenticate(req4, user=self.manager)
resp4 = invites_view(req4, uuid=str(org.uuid))
self.assertIn(resp4.status_code, (HTTP_201_CREATED, HTTP_200_OK))
token = resp4.data.get('token')
req5 = self.factory.get('/')
force_authenticate(req5, user=self.manager)
resp5 = invites_view(req5, uuid=str(org.uuid))
self.assertEqual(resp5.status_code, HTTP_200_OK)
OrganizationInvitation.objects.filter(token=token, organization=org).update(is_active=False)
self.assertFalse(OrganizationInvitation.objects.filter(token=token, is_active=True).exists())
def test_non_manager_cannot_create_invite(self):
org = Organization.objects.create(name='NoCreateOrg', owner=self.user)
OrganizationMembership.objects.create(organization=org, user=self.user)
view = OrganizationViewSet.as_view({'post': 'create_invite'})
req = self.factory.post('/')
force_authenticate(req, user=self.user)
resp = view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_403_FORBIDDEN)
def test_role_create_forbidden_for_non_manager(self):
org = Organization.objects.create(name='RoleNoCreateOrg', owner=self.user)
OrganizationMembership.objects.create(organization=org, user=self.user)
self.assertTrue(hasattr(OrganizationViewSet, 'role'))
def test_role_members_post_missing_user_id_returns_400(self):
org = Organization.objects.create(name='RoleMissingParamOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
role = org.roles.create(name='Ops')
self.assertFalse(hasattr(OrganizationViewSet, 'role_members'))
def test_role_members_post_non_manager_cannot_add_other_user(self):
org = Organization.objects.create(name='RoleAddForbiddenOrg', owner=self.user)
OrganizationMembership.objects.create(organization=org, user=self.user)
target = User.objects.create_user(email_address='target@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=target,)
role = org.roles.create(name='Contributor')
self.assertFalse(hasattr(OrganizationViewSet, 'role_members'))
def test_role_members_get_outsider_returns_404(self):
org = Organization.objects.create(name='RoleOutsiderOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
role = org.roles.create(name='Viewer')
outsider = User.objects.create_user(email_address='outsider2@example.com', password='pass')
self.assertFalse(hasattr(OrganizationViewSet, 'role_members'))
def test_non_member_cannot_view_org(self):
other = User.objects.create_user(email_address='outside@example.com', password='pass')
org = Organization.objects.create(name='HiddenOrg', owner=self.manager)
view = OrganizationViewSet.as_view({'get': 'retrieve'})
req = self.factory.get('/')
force_authenticate(req, user=other)
resp = view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_404_NOT_FOUND)
def test_owner_sees_org_in_list(self):
Organization.objects.create(name='OwnerListOrg', owner=self.manager)
view = OrganizationViewSet.as_view({'get': 'list'})
req = self.factory.get('/')
force_authenticate(req, user=self.manager)
resp = view(req)
self.assertEqual(resp.status_code, HTTP_200_OK)
self.assertTrue(any(o['name'] == 'OwnerListOrg' for o in resp.data))
def test_member_sees_org_in_list(self):
other = User.objects.create_user(email_address='member2@example.com', password='pass')
org = Organization.objects.create(name='MemberListOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=other,)
view = OrganizationViewSet.as_view({'get': 'list'})
req = self.factory.get('/')
force_authenticate(req, user=other)
resp = view(req)
self.assertEqual(resp.status_code, HTTP_200_OK)
self.assertTrue(any(o['name'] == 'MemberListOrg' for o in resp.data))
def test_non_member_not_in_list(self):
outsider = User.objects.create_user(email_address='outsider@example.com', password='pass')
Organization.objects.create(name='HiddenOrg2', owner=self.manager)
view = OrganizationViewSet.as_view({'get': 'list'})
req = self.factory.get('/')
force_authenticate(req, user=outsider)
resp = view(req)
self.assertEqual(resp.status_code, HTTP_200_OK)
self.assertFalse(any(o['name'] == 'HiddenOrg2' for o in resp.data))
def test_roles_visible_to_owner_and_member_but_not_outsider(self):
owner = self.manager
member = User.objects.create_user(email_address='rmember@example.com', password='pass')
outsider = User.objects.create_user(email_address='routsider@example.com', password='pass')
org = Organization.objects.create(name='RoleOrg2', owner=owner)
OrganizationMembership.objects.create(organization=org, user=member,)
role = org.roles.create(name='Tester')
self.assertTrue(org.roles.filter(name='Tester').exists())
self.assertIn(role, org.roles.all())
self.assertNotIn(outsider, role.members.all())
def test_members_endpoint_only_accessible_to_manager(self):
org = Organization.objects.create(name='MemberOnlyOrg', owner=self.manager)
member = User.objects.create_user(email_address='monly@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=member,)
members_view = OrganizationViewSet.as_view({'get': 'list_members'})
req = self.factory.get('/')
force_authenticate(req, user=member)
resp = members_view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_403_FORBIDDEN)
outsider = User.objects.create_user(email_address='notmem@example.com', password='pass')
req2 = self.factory.get('/')
force_authenticate(req2, user=outsider)
resp2 = members_view(req2, uuid=str(org.uuid))
self.assertEqual(resp2.status_code, HTTP_403_FORBIDDEN)
req3 = self.factory.get('/')
force_authenticate(req3, user=self.manager)
resp3 = members_view(req3, uuid=str(org.uuid))
self.assertEqual(resp3.status_code, HTTP_200_OK)
def test_invite_accept_invalid_or_expired(self):
org = Organization.objects.create(name='InvalidInviteOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
invite = OrganizationInvitation.objects.create(organization=org, created_by=self.user)
invite.expires_at = invite.created_at - timezone.timedelta(days=1)
invite.save()
other = User.objects.create_user(email_address='inviter2@example.com', password='pass')
invite_view = OrganizationViewSet.as_view({'post': 'join'})
req = self.factory.post('/')
force_authenticate(req, user=other)
resp = invite_view(req, token=str(invite.token))
self.assertIn(resp.status_code, (HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND))
def test_remove_member_by_non_manager_forbidden(self):
org = Organization.objects.create(name='RemoveForbidOrg', owner=self.user)
OrganizationMembership.objects.create(organization=org, user=self.user)
member = User.objects.create_user(email_address='m2@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=member,)
remove_view = OrganizationViewSet.as_view({'post': 'remove_member'})
req = self.factory.post('/')
force_authenticate(req, user=self.user)
resp = remove_view(req, uuid=str(org.uuid), user_id=str(member.id))
self.assertEqual(resp.status_code, HTTP_403_FORBIDDEN)
def test_update_member_by_non_manager_forbidden(self):
org = Organization.objects.create(name='UpdateForbidOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.user)
member = User.objects.create_user(email_address='m3@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=member,)
update_view = OrganizationViewSet.as_view({'get': 'list_members'})
req = self.factory.get('/')
force_authenticate(req, user=self.user)
resp = update_view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_403_FORBIDDEN)
def test_invite_revoke_by_non_manager_forbidden(self):
org = Organization.objects.create(name='RevokeForbidOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.user)
OrganizationMembership.objects.create(organization=org, user=User.objects.create_user(email_address='mgr@example.com', password='p'),)
token = OrganizationInvitation.objects.create(organization=org, created_by=self.user)
revoke_view = OrganizationViewSet.as_view({'get': 'list_invites'})
req = self.factory.get('/')
force_authenticate(req, user=self.user)
resp = revoke_view(req, uuid=str(org.uuid))
self.assertEqual(resp.status_code, HTTP_403_FORBIDDEN)
def test_role_create_and_visibility(self):
org = Organization.objects.create(name='RoleCreateOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
role = org.roles.create(name='Tester')
self.assertIsNotNone(role)
self.assertTrue(org.roles.filter(name='Tester').exists())
def test_role_members_get_and_post(self):
org = Organization.objects.create(name='RoleMembersOrg', owner=self.manager)
OrganizationMembership.objects.create(organization=org, user=self.manager)
member = User.objects.create_user(email_address='memberrole@example.com', password='pass')
OrganizationMembership.objects.create(organization=org, user=member,)
role = org.roles.create(name='Developer')
RoleMembership.objects.create(role=role, user=member)
self.assertIn(member, role.members.all())

View file

@ -1,75 +0,0 @@
from django.test import TestCase
from django.utils import timezone
from django.contrib.auth import get_user_model
from datetime import timedelta
from apps.orgs.models import Organization, OrganizationMembership, OrganizationInvitation, Role, RoleMembership
User = get_user_model()
class OrganizationModelTests(TestCase):
def setUp(self):
self.user = User.objects.create_user(email_address='u@example.com', password='pass', is_manager=True)
def test_create_organization_and_membership(self):
org = Organization.objects.create(name='Acme', owner=self.user)
self.assertEqual(org.owner, self.user)
self.assertEqual(org.name, 'Acme')
self.assertEqual(org.members.count(), 0)
m = OrganizationMembership.objects.create(organization=org, user=self.user)
self.assertIn(self.user, org.members.all())
def test_invitation_defaults_and_validation(self):
org = Organization.objects.create(name='InvOrg', owner=self.user)
invite = OrganizationInvitation.objects.create(organization=org, created_by=self.user)
self.assertIsNotNone(invite.expires_at)
self.assertTrue(invite.is_valid())
invite.uses += 1
invite.save()
self.assertFalse(invite.is_valid())
invite.uses = 0
invite.expires_at = timezone.now() - timedelta(days=1)
invite.save()
self.assertFalse(invite.is_valid())
def test_role_and_role_membership(self):
org = Organization.objects.create(name='RoleOrg', owner=self.user)
role = Role.objects.create(name='Admin', organization=org)
rm = RoleMembership.objects.create(role=role, user=self.user)
self.assertIn(role, org.roles.all())
self.assertIn(self.user, role.members.all())
def test_unique_organization_name(self):
Organization.objects.create(name='UniqueOrg', owner=self.user)
with self.assertRaises(Exception):
Organization.objects.create(name='UniqueOrg', owner=self.user)
def test_membership_unique_together(self):
org = Organization.objects.create(name='UTOrg', owner=self.user)
OrganizationMembership.objects.create(organization=org, user=self.user)
with self.assertRaises(Exception):
OrganizationMembership.objects.create(organization=org, user=self.user)
def test_invite_default_expiry_is_seven_days(self):
org = Organization.objects.create(name='ExpiryOrg', owner=self.user)
invite = OrganizationInvitation.objects.create(organization=org, created_by=self.user)
delta = invite.expires_at - invite.created_at
self.assertTrue(6 <= delta.days <= 8)
def test_invite_str_contains_org_name(self):
org = Organization.objects.create(name='StrOrg', owner=self.user)
invite = OrganizationInvitation.objects.create(organization=org, created_by=self.user)
self.assertIn('StrOrg', str(invite))
def test_role_uuid_and_unique(self):
org = Organization.objects.create(name='RoleUuidOrg', owner=self.user)
r1 = Role.objects.create(name='R1', organization=org)
r2 = Role.objects.create(name='R2', organization=org)
self.assertNotEqual(r1.uuid, r2.uuid)
def test_str_methods(self):
org = Organization.objects.create(name='StrTestOrg', owner=self.user)
m = OrganizationMembership.objects.create(organization=org, user=self.user)
self.assertIn(org.name, str(m))

View file

@ -1,257 +0,0 @@
from apps.orgs.models import Organization, OrganizationMembership, OrganizationInvitation, Role, RoleMembership, TrainingFile
from apps.orgs.serializers import ModelSerializer, OrganizationSerializer, OrganizationMembershipSerializer, OrganizationInvitationSerializer, RoleSerializer, RoleMembershipSerializer, TrainingFileSerializer
from rest_framework.viewsets import ModelViewSet
from rest_framework.permissions import IsAuthenticated
from django.db.models import Q
from rest_framework.response import Response
from rest_framework.status import HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND, HTTP_400_BAD_REQUEST
from rest_framework.decorators import action
from django.utils import timezone
from apps.users.models import User
from apps.users.serializers import UserSerializer
from rest_framework.parsers import MultiPartParser, FormParser
class OrganizationViewSet(ModelViewSet):
queryset = Organization.objects.all()
serializer_class = OrganizationSerializer
permission_classes = [IsAuthenticated]
lookup_field = 'uuid'
def get_queryset(self):
return Organization.objects.filter(Q(memberships__user = self.request.user) | Q(owner = self.request.user)).distinct()
def perform_create(self, serializer):
organization = serializer.save(owner=self.request.user)
OrganizationMembership.objects.create(user = self.request.user, organization = organization)
def update(self, request, *args, **kwargs):
if not request.user.is_manager:
return Response({'error': 'Only managers can update organization details'}, status=HTTP_403_FORBIDDEN)
return super().update(request, *args, **kwargs)
@action(detail=True, methods=['post'], url_path='create-invite')
def create_invite(self, request, uuid = None):
organization = self.get_object()
if not request.user.is_manager:
return Response({'error': 'Only managers can create invites'}, status = HTTP_403_FORBIDDEN)
max_uses = request.query_params.get('max_uses')
max_uses = int(max_uses) if max_uses and max_uses.isdigit() and int(max_uses) > 0 else 1
invitation = OrganizationInvitation.objects.create(
organization = organization,
created_by = request.user,
max_uses = max_uses
)
return Response(OrganizationInvitationSerializer(invitation, context={'request': request}).data)
@action(detail=False, methods=['post'], url_path='join/(?P<token>[0-9a-f-]{36})')
def join(self, request, token = None):
try:
invitation = OrganizationInvitation.objects.get(token = token)
except OrganizationInvitation.DoesNotExist:
return Response({'error': 'Invalid invitation token'}, status = HTTP_404_NOT_FOUND)
if not invitation.is_active or invitation.expires_at < timezone.now():
return Response({'error': 'Invitation token is no longer valid'}, status = HTTP_400_BAD_REQUEST)
if invitation.uses >= invitation.max_uses:
invitation.is_active = False
invitation.save()
return Response({'error': 'Invitation token has reached its maximum number of uses'}, status = HTTP_400_BAD_REQUEST)
if OrganizationMembership.objects.filter(user = request.user, organization = invitation.organization).exists():
return Response({'error': 'You are already a member of this organization'}, status = HTTP_403_FORBIDDEN)
OrganizationMembership.objects.create(user = request.user, organization = invitation.organization)
invitation.uses += 1
if invitation.uses >= invitation.max_uses:
invitation.is_active = False
invitation.save()
organization_data = OrganizationSerializer(invitation.organization, context={'request': request}).data
organization_data['message'] = 'Successfully joined the organization'
organization_data['success'] = True
return Response(organization_data)
@action(detail=True, methods=['post'], url_path='leave')
def leave(self, request, uuid = None):
organization = self.get_object()
try:
membership = OrganizationMembership.objects.get(user = request.user, organization = organization)
except OrganizationMembership.DoesNotExist:
return Response({'error': 'You are not a member of this organization'}, status = HTTP_403_FORBIDDEN)
if organization.owner == request.user:
return Response({'error': 'The owner cannot leave the organization. Please transfer ownership or delete the organization.'}, status = HTTP_403_FORBIDDEN)
membership.delete()
return Response({'message': 'Successfully left the organization'})
@action(detail=True, methods=['get'], url_path='invite')
def list_invites(self, request, uuid = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can view invites'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
invites = OrganizationInvitation.objects.filter(organization = organization, is_active = True)
serializer = OrganizationInvitationSerializer(invites, many = True, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['get'], url_path='invite/(?P<token>[0-9a-f-]{36})')
def invite_detail(self, request, uuid = None, token = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can view invite details'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
try:
invitation = OrganizationInvitation.objects.get(token = token, organization = organization)
except OrganizationInvitation.DoesNotExist:
return Response({'error': 'Invalid invitation token'}, status = HTTP_403_FORBIDDEN)
serializer = OrganizationInvitationSerializer(invitation, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['post', 'delete'], url_path='invite/(?P<token>[0-9a-f-]{36})/revoke')
def revoke_invite(self, request, uuid = None, token = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can revoke invites'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
try:
invitation = OrganizationInvitation.objects.get(token = token, organization = organization)
except OrganizationInvitation.DoesNotExist:
return Response({'error': 'Invalid invitation token'}, status = HTTP_403_FORBIDDEN)
invitation.is_active = False
invitation.save()
return Response({'message': 'Invitation successfully revoked'})
@action(detail=True, methods=['get'], url_path='member')
def list_members(self, request, uuid = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can view members'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
memberships = User.objects.filter(organization_memberships__organization = organization)
serializer = UserSerializer(memberships, many = True)
return Response(serializer.data)
@action(detail=True, methods=['post'], url_path=r'member/(?P<user_id>\d+)/remove')
def remove_member(self, request, uuid = None, user_id = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can remove members'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
try:
membership = OrganizationMembership.objects.get(user__id = user_id, organization = organization)
except OrganizationMembership.DoesNotExist:
return Response({'error': 'User is not a member of this organization'}, status = HTTP_403_FORBIDDEN)
if membership.user == organization.owner:
return Response({'error': 'Cannot remove the owner from the organization'}, status = HTTP_403_FORBIDDEN)
membership.delete()
return Response({'message': 'Member successfully removed from the organization'})
@action(detail=True, methods=['get', 'post'], url_path='role')
def role(self, request, uuid = None):
organization = self.get_object()
if request.method == 'GET':
roles = Role.objects.filter(organization = organization)
serializer = RoleSerializer(roles, many = True)
return Response(serializer.data)
if not request.user.is_manager:
return Response({'error': 'Only managers can create roles'}, status = HTTP_403_FORBIDDEN)
name = request.data.get('name')
if not name:
return Response({'error': 'Role name is required'}, status = HTTP_403_FORBIDDEN)
role = Role.objects.create(name = name, organization = organization)
serializer = RoleSerializer(role)
return Response(serializer.data)
@action(detail=False, methods=['get'], url_path='role/mine')
def my_roles(self, request):
roles = Role.objects.filter(memberships__user=request.user).distinct()
serializer = RoleSerializer(roles, many=True)
return Response(serializer.data)
@action(detail=True, methods=['post'], url_path='role/(?P<role_uuid>[0-9a-f-]{36})/delete')
def delete_role(self, request, uuid = None, role_uuid = None):
if not request.user.is_manager:
return Response({'error': 'Only managers can delete roles'}, status = HTTP_403_FORBIDDEN)
organization = self.get_object()
try:
role = Role.objects.get(uuid = role_uuid, organization = organization)
except Role.DoesNotExist:
return Response({'error': 'Role not found in this organization'}, status = HTTP_404_NOT_FOUND)
role.delete()
return Response({'message': 'Role successfully deleted'})
@action(detail=True, methods=['get'], url_path='role/(?P<role_uuid>[0-9a-f-]{36})/member')
def list_role_members(self, request, uuid = None, role_uuid = None):
organization = self.get_object()
try:
role = Role.objects.get(uuid = role_uuid, organization = organization)
except Role.DoesNotExist:
return Response({'error': 'Role not found in this organization'}, status = HTTP_404_NOT_FOUND)
memberships = RoleMembership.objects.filter(role = role)
serializer = RoleMembershipSerializer(memberships, many = True)
return Response(serializer.data)
@action(detail=True, methods=['get', 'post'], url_path='training-file')
def training_files(self, request, uuid = None):
organization = self.get_object()
if request.method == 'GET':
role_uuid = request.query_params.get('role_uuid')
training_files = TrainingFile.objects.filter(role__organization=organization)
if role_uuid:
training_files = training_files.filter(role__uuid=role_uuid, role__organization=organization)
serializer = TrainingFileSerializer(training_files, many=True, context={'request': request})
return Response(serializer.data)
if not (organization.owner == request.user or
organization.memberships.filter(user=request.user).exists()):
return Response(
{'error': 'You do not have permission to upload files to this organization'},
status=HTTP_403_FORBIDDEN
)
role_uuid = request.data.get('role_uuid')
if not role_uuid:
return Response({'error': 'role_uuid is required'}, status=HTTP_400_BAD_REQUEST)
try:
Role.objects.get(uuid=role_uuid, organization=organization)
except Role.DoesNotExist:
return Response({'error': 'Role not found in this organization'}, status=HTTP_404_NOT_FOUND)
serializer = TrainingFileSerializer(data=request.data, context={'request': request})
if serializer.is_valid():
serializer.save(uploaded_by=request.user)
return Response(serializer.data, status=201)
return Response(serializer.errors, status=HTTP_400_BAD_REQUEST)
@action(detail=True, methods=['get', 'delete'], url_path='training-file/(?P<file_uuid>[0-9a-f-]{36})')
def training_file_detail(self, request, uuid = None, file_uuid = None):
organization = self.get_object()
try:
training_file = TrainingFile.objects.get(uuid=file_uuid, role__organization=organization)
except TrainingFile.DoesNotExist:
return Response({'error': 'Training file not found'}, status=HTTP_404_NOT_FOUND)
if request.method == 'GET':
serializer = TrainingFileSerializer(training_file, context={'request': request})
return Response(serializer.data)
if not (training_file.uploaded_by == request.user or
training_file.role.organization.owner == request.user or
request.user.is_manager):
return Response(
{'error': 'You do not have permission to delete this file'},
status=HTTP_403_FORBIDDEN
)
file_name = training_file.file_name
training_file.delete()
return Response({'message': f'File "{file_name}" successfully deleted'})

View file

View file

@ -1,26 +0,0 @@
from django.contrib import admin
from django.contrib.auth.admin import UserAdmin as DjangoUserAdmin
from django.contrib.auth.models import Group
from apps.users.models import User
admin.site.unregister(Group)
@admin.register(User)
class UserAdmin(DjangoUserAdmin):
fieldsets = (
(None, {'fields': ('email_address', 'password')}),
('Personal info', {'fields': ('first_name', 'last_name')}),
('Permissions', {'fields': ('is_active', 'is_staff', 'is_superuser', 'is_manager')}),
('Dates', {'fields': ('last_login',)}),
)
add_fieldsets = (
(None, {
'classes': ('wide',),
'fields': ('email_address', 'first_name', 'last_name', 'password1', 'password2'),
}),
)
list_display = ('email_address', 'first_name', 'last_name', 'is_staff')
search_fields = ('email_address', 'first_name', 'last_name')
ordering = ('email_address',)

View file

@ -1,5 +0,0 @@
from django.apps import AppConfig
class UsersConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'apps.users'

View file

@ -1,27 +0,0 @@
from django.contrib.auth.hashers import make_password
from django.contrib.auth.models import BaseUserManager
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from apps.users.models import User
class UserManager(BaseUserManager["User"]):
def _create_user(self, email_address: str, password: str | None, **extra_fields):
if not email_address:
raise ValueError("The given email must be set")
email_address = self.normalize_email(email_address)
user: User = self.model(email_address=email_address, **extra_fields)
user.password = make_password(password)
user.save(using=self._db)
return user
def create_user(self, email_address: str, password: str | None = None, **extra_fields):
extra_fields.setdefault("is_staff", False)
return self._create_user(email_address, password, **extra_fields)
def create_superuser(self, email_address: str, password: str | None = None, **extra_fields):
extra_fields.setdefault("is_staff", True)
if extra_fields.get("is_staff") is not True:
raise ValueError("Superuser must have is_staff=True.")
return self._create_user(email_address, password, **extra_fields)

View file

@ -1,41 +0,0 @@
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
('auth', '0012_alter_user_first_name_max_length'),
]
operations = [
migrations.CreateModel(
name='User',
fields=[
('password', models.CharField(max_length=128, verbose_name='password')),
('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')),
('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='Created At')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='Updated At')),
('id', models.AutoField(primary_key=True, serialize=False, verbose_name='User ID')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, verbose_name='User UUID')),
('email_address', models.EmailField(max_length=255, unique=True, verbose_name='Email Address')),
('first_name', models.CharField(max_length=255, verbose_name='First Name')),
('last_name', models.CharField(max_length=255, verbose_name='Last Name')),
('date_of_birth', models.DateField(blank=True, null=True, verbose_name='Date of Birth')),
('bio', models.TextField(blank=True, default='')),
('timezone', models.CharField(blank=True, default='UTC', max_length=16)),
('avatar_url', models.URLField(blank=True)),
('is_active', models.BooleanField(default=True, verbose_name='Account Active')),
('is_staff', models.BooleanField(default=False, verbose_name='Account Admin')),
('is_manager', models.BooleanField(default=False, verbose_name='Organization Manager')),
('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')),
('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')),
],
options={
'verbose_name': 'User',
'verbose_name_plural': 'Users',
},
),
]

View file

@ -1,10 +0,0 @@
from django.db.models import DateTimeField, Model
from django.utils.translation import gettext_lazy as _
class TimeStampMixin(Model):
created_at = DateTimeField(verbose_name = "Created At", auto_now_add = True)
updated_at = DateTimeField(verbose_name = "Updated At", auto_now = True)
class Meta:
abstract = True

View file

@ -1,50 +0,0 @@
from django.contrib.auth.models import AbstractBaseUser, PermissionsMixin
from django.db.models import AutoField, BooleanField, CharField, DateField, EmailField, UUIDField, TextField, URLField
from django.utils.translation import gettext_lazy as _
from typing import ClassVar
from uuid import uuid4
from apps.users.managers import UserManager
from apps.users.mixins import TimeStampMixin
from django.conf import settings
class User(AbstractBaseUser, TimeStampMixin, PermissionsMixin):
id = AutoField(verbose_name = _("User ID"), primary_key = True)
uuid = UUIDField(verbose_name = _("User UUID"), default = uuid4, editable = False)
email_address = EmailField(verbose_name = _("Email Address"), max_length = 255, unique = True)
first_name = CharField(verbose_name = _("First Name"), max_length = 255)
last_name = CharField(verbose_name = _("Last Name"), max_length = 255)
date_of_birth = DateField(verbose_name = _("Date of Birth"), null = True, blank = True)
bio = TextField(default = "", blank = True)
timezone = CharField(default = settings.TIME_ZONE, max_length = 16, blank = True)
avatar_url = URLField(blank = True)
is_active = BooleanField(verbose_name = _("Account Active"), default = True)
is_staff = BooleanField(verbose_name = _("Account Admin"), default = False)
is_manager = BooleanField(verbose_name = _("Organization Manager"), default = False)
USERNAME_FIELD = 'email_address'
EMAIL_FIELD = 'email_address'
REQUIRED_FIELDS = ['first_name', 'last_name', 'date_of_birth']
objects: ClassVar[UserManager] = UserManager()
def has_perm(self, perm, obj=None):
return True
def has_module_perms(self, app_label):
return True
class Meta:
verbose_name = _('User')
verbose_name_plural = _('Users')
@property
def full_name(self) -> str:
return f"{self.first_name} {self.last_name}"
def __str__(self) -> str:
return self.full_name

View file

@ -1,9 +0,0 @@
from rest_framework.serializers import ModelSerializer
from apps.users.models import User
class UserSerializer(ModelSerializer):
class Meta:
model = User
fields = ['id', 'uuid', 'email_address', 'first_name', 'last_name', 'bio', 'timezone', 'avatar_url', 'is_manager', 'date_of_birth', 'created_at', 'updated_at', 'is_staff']
read_only_fields = ['id', 'uuid', 'created_at', 'updated_at', 'is_staff']

View file

@ -1,669 +0,0 @@
from django.test import TestCase
from django.contrib.auth import get_user_model
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN
from rest_framework.test import APIClient
User = get_user_model()
class UserLoginActionTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user_data = {
'email_address': 'testuser@example.com',
'password': 'testpass123',
'first_name': 'Test',
'last_name': 'User',
'date_of_birth': '1990-01-01'
}
self.user = User.objects.create_user(**self.user_data)
def test_login_successful(self):
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertTrue(data['success'])
self.assertEqual(data['message'], 'Login successful')
self.assertIn('user', data)
self.assertEqual(data['user']['email_address'], 'testuser@example.com')
def test_login_missing_email(self):
response = self.client.post('/api/user/login/', {
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertIn('error', data)
def test_login_missing_password(self):
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertIn('error', data)
def test_login_invalid_credentials(self):
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'wrongpassword'
})
self.assertEqual(response.status_code, HTTP_401_UNAUTHORIZED)
def test_login_nonexistent_user(self):
response = self.client.post('/api/user/login/', {
'email_address': 'nonexistent@example.com',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_401_UNAUTHORIZED)
def test_login_session_created(self):
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_200_OK)
self.assertIn('sessionid', self.client.cookies)
def test_login_inactive_user(self):
self.user.is_active = False
self.user.save()
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_401_UNAUTHORIZED)
def test_login_case_insensitive_email(self):
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@EXAMPLE.COM',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_200_OK)
class UserLogoutActionTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
email_address='testuser@example.com',
password='testpass123',
first_name='Test',
last_name='User'
)
def test_logout_successful(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/logout/')
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertTrue(data['success'])
def test_logout_without_login(self):
response = self.client.post('/api/user/logout/')
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
def test_session_destroyed_after_logout(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
self.client.post('/api/user/logout/')
response = self.client.get('/api/user/me/')
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
class UserMeActionTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
email_address='testuser@example.com',
password='testpass123',
first_name='Test',
last_name='User'
)
def test_me_authenticated(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.get('/api/user/me/')
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertTrue(data['success'])
self.assertEqual(data['email_address'], 'testuser@example.com')
def test_me_unauthenticated(self):
response = self.client.get('/api/user/me/')
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
def test_me_returns_correct_user_data(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.get('/api/user/me/')
data = response.json()
expected_fields = {'id', 'uuid', 'email_address', 'first_name', 'last_name'}
self.assertTrue(expected_fields.issubset(set(data.keys())))
class UserSessionActionTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
email_address='testuser@example.com',
password='testpass123',
first_name='Test',
last_name='User'
)
def test_session_authenticated(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.get('/api/user/session/')
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertTrue(data['isAuthenticated'])
def test_session_unauthenticated(self):
response = self.client.get('/api/user/session/')
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertFalse(data['isAuthenticated'])
def test_session_staff_status(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.get('/api/user/session/')
data = response.json()
self.assertIn('isStaff', data)
self.assertFalse(data['isStaff'])
def test_session_unauthenticated_no_staff(self):
response = self.client.get('/api/user/session/')
data = response.json()
self.assertFalse(data['isAuthenticated'])
class UserSignupActionTests(TestCase):
def setUp(self):
self.client = APIClient()
def test_signup_successful(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'newuser@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'date_of_birth': '1995-05-05',
'manager': False
})
self.assertEqual(response.status_code, HTTP_201_CREATED)
data = response.json()
self.assertTrue(data['success'])
self.assertIn('User account created successfully', data['detail'])
self.assertTrue(User.objects.filter(email_address='newuser@example.com').exists())
def test_signup_email_exists(self):
User.objects.create_user(
email_address='existing@example.com',
password='pass',
first_name='Existing',
last_name='User'
)
response = self.client.post('/api/user/signup/', {
'email_address': 'existing@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertFalse(data['success'])
self.assertIn('Email address already exists', data['detail'])
def test_signup_missing_first_name(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'newuser2@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertFalse(data['success'])
def test_signup_missing_last_name(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'newuser3@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertFalse(data['success'])
def test_signup_missing_manager(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'missingmanager@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'Missing',
'last_name': 'Manager'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertFalse(data['success'])
def test_signup_passwords_mismatch(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'newuser4@example.com',
'password': 'newpass123',
'confirm_password': 'differentpass',
'first_name': 'New',
'last_name': 'User',
'manager' : True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertIn('Passwords do not match', data['detail'])
def test_signup_missing_email(self):
response = self.client.post('/api/user/signup/', {
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_signup_missing_password(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'newuser@example.com',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_signup_empty_data(self):
response = self.client.post('/api/user/signup/', {})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_signup_case_insensitive_email(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'NewUser@EXAMPLE.COM',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_201_CREATED)
user = User.objects.get(email_address='NewUser@example.com')
self.assertEqual(user.email_address, 'NewUser@example.com')
def test_signup_duplicate_case_insensitive(self):
User.objects.create_user(
email_address='test@example.com',
password='pass',
first_name='Test',
last_name='User'
)
response = self.client.post('/api/user/signup/', {
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'New',
'last_name': 'User',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
class UserChangePasswordActionTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
email_address='testuser@example.com',
password='testpass123',
first_name='Test',
last_name='User'
)
def test_change_password_successful(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'newpass456',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertTrue(data['success'])
self.user.refresh_from_db()
self.assertTrue(self.user.check_password('newpass456'))
def test_change_password_wrong_old_password(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'wrongoldpass',
'password': 'newpass456',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_401_UNAUTHORIZED)
data = response.json()
self.assertFalse(data['success'])
def test_change_password_mismatch(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'newpass456',
'confirm_password': 'differentpass'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertIn('Passwords do not match', data['detail'])
def test_change_password_missing_old_password(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'password': 'newpass456',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
data = response.json()
self.assertIn('old_password', data['detail'])
def test_change_password_missing_new_password(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_change_password_unauthenticated(self):
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'newpass456',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_403_FORBIDDEN)
def test_change_password_empty_old_password(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': '',
'password': 'newpass456',
'confirm_password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_can_login_with_new_password_after_change(self):
self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'testpass123'
})
self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'brandnewpass789',
'confirm_password': 'brandnewpass789'
})
self.client.logout()
response = self.client.post('/api/user/login/', {
'email_address': 'testuser@example.com',
'password': 'brandnewpass789'
})
self.assertEqual(response.status_code, HTTP_200_OK)
class UserEdgeCaseTests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
email_address='edgecase@example.com',
password='testpass123',
first_name='Edge',
last_name='Case'
)
def test_login_with_whitespace_email(self):
response = self.client.post('/api/user/login/', {
'email_address': ' testuser@example.com ',
'password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_401_UNAUTHORIZED)
def test_signup_with_very_long_name(self):
long_name = 'A' * 255
response = self.client.post('/api/user/signup/', {
'email_address': 'longname@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': long_name,
'last_name': long_name,
'manager': True
})
self.assertEqual(response.status_code, HTTP_201_CREATED)
def test_signup_with_too_long_name(self):
too_long_name = 'A' * 256
response = self.client.post('/api/user/signup/', {
'email_address': 'verylongname@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': too_long_name,
'last_name': 'User',
'manager': True
})
self.assertIn(response.status_code, [HTTP_400_BAD_REQUEST, HTTP_201_CREATED])
def test_signup_with_special_characters_in_name(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'special@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'José',
'last_name': "O'Brien-Smith",
'manager': True
})
self.assertEqual(response.status_code, HTTP_201_CREATED)
def test_change_password_same_as_old(self):
self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'testpass123',
'confirm_password': 'testpass123'
})
self.assertEqual(response.status_code, HTTP_200_OK)
def test_signup_missing_confirm_password_field(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'missingconfirm@example.com',
'password': 'newpass123',
'first_name': 'Missing',
'last_name': 'Confirm',
'manager': True
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_login_multiple_times_same_session(self):
response1 = self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
session_id_1 = self.client.cookies.get('sessionid')
me1 = self.client.get('/api/user/me/')
self.assertEqual(me1.status_code, HTTP_200_OK)
response2 = self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
session_id_2 = self.client.cookies.get('sessionid')
self.assertEqual(response1.status_code, HTTP_200_OK)
self.assertEqual(response2.status_code, HTTP_200_OK)
def test_staff_user_login_shows_staff_status(self):
staff_user = User.objects.create_user(
email_address='staff@example.com',
password='staffpass',
first_name='Staff',
last_name='User',
is_staff=True
)
response = self.client.post('/api/user/login/', {
'email_address': 'staff@example.com',
'password': 'staffpass'
})
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertIn('user', data)
def test_session_status_after_explicit_logout(self):
self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
self.client.post('/api/user/logout/')
response = self.client.get('/api/user/session/')
data = response.json()
self.assertFalse(data['isAuthenticated'])
def test_signup_with_null_optional_fields(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'optional@example.com',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'Optional',
'last_name': 'Fields',
'manager': True
})
self.assertEqual(response.status_code, HTTP_201_CREATED)
def test_change_password_with_missing_confirm_password(self):
self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
response = self.client.post('/api/user/change_password/', {
'old_password': 'testpass123',
'password': 'newpass456'
})
self.assertEqual(response.status_code, HTTP_400_BAD_REQUEST)
def test_login_and_logout_sequence(self):
resp1 = self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
self.assertEqual(resp1.status_code, HTTP_200_OK)
me1 = self.client.get('/api/user/me/')
self.assertEqual(me1.status_code, HTTP_200_OK)
logout_resp = self.client.post('/api/user/logout/')
self.assertEqual(logout_resp.status_code, HTTP_200_OK)
me2 = self.client.get('/api/user/me/')
self.assertEqual(me2.status_code, HTTP_403_FORBIDDEN)
resp2 = self.client.post('/api/user/login/', {
'email_address': 'edgecase@example.com',
'password': 'testpass123'
})
self.assertEqual(resp2.status_code, HTTP_200_OK)
me3 = self.client.get('/api/user/me/')
self.assertEqual(me3.status_code, HTTP_200_OK)
def test_invalid_email_format(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'not-an-email',
'password': 'newpass123',
'confirm_password': 'newpass123',
'first_name': 'Invalid',
'last_name': 'Email',
'manager': True
})
self.assertIn(response.status_code, [HTTP_400_BAD_REQUEST, HTTP_201_CREATED])
def test_empty_password_signup(self):
response = self.client.post('/api/user/signup/', {
'email_address': 'emptypass@example.com',
'password': '',
'confirm_password': '',
'first_name': 'Empty',
'last_name': 'Pass',
'manager': True
})
self.assertIn(response.status_code, [HTTP_400_BAD_REQUEST, HTTP_201_CREATED])
def test_role_preserved_after_login(self):
user = User.objects.create_user(
email_address='manager@example.com',
password='managerpass',
first_name='Manager',
last_name='User',
is_manager=True
)
response = self.client.post('/api/user/login/', {
'email_address': 'manager@example.com',
'password': 'managerpass'
})
self.assertEqual(response.status_code, HTTP_200_OK)
data = response.json()
self.assertIn('user', data)
self.assertEqual(data['user']['email_address'], 'manager@example.com')
self.assertTrue(data['user']['is_manager'])

View file

@ -1,55 +0,0 @@
from django.test import TestCase
from django.contrib.auth import get_user_model
from rest_framework.test import APIClient
from rest_framework.status import HTTP_200_OK, HTTP_404_NOT_FOUND
User = get_user_model()
class UserListAPITests(TestCase):
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user(
password='pass1234',
email_address='apiuser@example.com',
first_name='API',
last_name='User',
date_of_birth='1995-05-05',
)
def test_list_users(self):
url = '/api/user/'
resp = self.client.get(url)
self.assertEqual(resp.status_code, HTTP_200_OK)
data = resp.json()
self.assertIsInstance(data, (list, dict))
def test_api_response_contains_expected_fields(self):
url = '/api/user/'
resp = self.client.get(url)
self.assertEqual(resp.status_code, HTTP_200_OK)
data = resp.json()
if isinstance(data, dict) and 'results' in data:
users = data['results']
else:
users = data
self.assertTrue(len(users) >= 1)
sample = users[0]
expected_keys = {'id', 'uuid', 'email_address', 'first_name', 'last_name', 'bio', 'timezone', 'avatar_url'}
self.assertTrue(expected_keys.issubset(set(sample.keys())))
def test_retrieve_user_by_uuid(self):
url = f'/api/user/{self.user.uuid}/'
resp = self.client.get(url)
self.assertEqual(resp.status_code, HTTP_200_OK)
data = resp.json()
self.assertEqual(data['email_address'], 'apiuser@example.com')
def test_retrieve_user_not_found(self):
import uuid
fake_uuid = uuid.uuid4()
url = f'/api/user/{fake_uuid}/'
resp = self.client.get(url)
self.assertEqual(resp.status_code, HTTP_404_NOT_FOUND)

View file

@ -1,121 +0,0 @@
from django.test import TestCase
from django.contrib.auth import get_user_model
from django.db import IntegrityError
from django.conf import settings
import uuid
User = get_user_model()
class UserModelTests(TestCase):
def setUp(self):
self.user_data = {
'email_address': 'Test@Example.com',
'first_name': 'Test',
'last_name': 'User',
'date_of_birth': '1990-01-01',
}
def test_create_user_and_properties(self):
user = User.objects.create_user(password='pass1234', **self.user_data)
self.assertIsNotNone(user.pk)
self.assertEqual(user.email_address, 'Test@example.com')
self.assertEqual(user.full_name, 'Test User')
def test_create_superuser(self):
su = User.objects.create_superuser(password='adminpass', **self.user_data)
self.assertTrue(su.is_staff)
self.assertIsNotNone(su.pk)
self.assertTrue(su.is_active)
def test_password_hashed_and_check(self):
user = User.objects.create_user(email_address='hashme@example.com', password='secret123')
self.assertNotEqual(user.password, 'secret123')
self.assertTrue(user.check_password('secret123'))
def test_uuid_and_id_auto_populated(self):
u1 = User.objects.create_user(email_address='one@example.com', password='p')
u2 = User.objects.create_user(email_address='two@example.com', password='p')
self.assertIsNotNone(u1.uuid)
self.assertIsInstance(u1.uuid, uuid.UUID)
self.assertNotEqual(u1.uuid, u2.uuid)
self.assertIsNotNone(u1.id)
self.assertIsNotNone(u2.id)
def test_default_fields(self):
u = User.objects.create_user(email_address='defaults@example.com', password='p')
self.assertEqual(u.bio, "")
self.assertEqual(u.timezone, settings.TIME_ZONE)
self.assertEqual(u.avatar_url, "")
self.assertTrue(u.is_active)
self.assertFalse(u.is_staff)
def test_unique_email_constraint(self):
User.objects.create_user(email_address='dup@example.com', password='p')
with self.assertRaises(IntegrityError):
User.objects.create_user(email_address='dup@example.com', password='p')
def test_create_user_without_email_raises(self):
with self.assertRaises(ValueError):
User.objects.create_user(email_address='', password='p')
def test_date_of_birth_optional(self):
u = User.objects.create_user(email_address='nodob@example.com', password='p')
self.assertIsNone(u.date_of_birth)
def test_str_and_full_name(self):
u = User.objects.create_user(
email_address='name@example.com',
password='p',
first_name='A',
last_name='B'
)
self.assertEqual(u.full_name, 'A B')
self.assertEqual(str(u), 'A B')
def test_email_normalization_domain_lowercase(self):
user1 = User.objects.create_user(email_address='Test@EXAMPLE.COM', password='p')
self.assertEqual(user1.email_address, 'Test@example.com')
user2 = User.objects.create_user(email_address='test@EXAMPLE.COM', password='p2')
self.assertEqual(user2.email_address, 'test@example.com')
self.assertNotEqual(user1.email_address, user2.email_address)
def test_superuser_must_have_is_staff(self):
with self.assertRaises(ValueError):
User.objects.create_superuser(
email_address='fail@example.com',
password='p',
is_staff=False
)
def test_role_default_is_employee(self):
u = User.objects.create_user(email_address='role@example.com', password='p')
self.assertFalse(getattr(u, 'is_manager', False))
def test_role_choices(self):
u = User.objects.create_user(
email_address='manager@example.com',
password='p',
is_manager=True
)
self.assertTrue(u.is_manager)
def test_timestamps_auto_set(self):
from datetime import timedelta
u = User.objects.create_user(email_address='timestamps@example.com', password='p')
self.assertIsNotNone(u.created_at)
self.assertIsNotNone(u.updated_at)
time_diff = abs((u.updated_at - u.created_at).total_seconds())
self.assertLess(time_diff, 1.0)
def test_has_perm_returns_true(self):
u = User.objects.create_user(email_address='perm@example.com', password='p')
self.assertTrue(u.has_perm('any.permission'))
self.assertTrue(u.has_perm('another.permission', obj=None))
def test_has_module_perms_returns_true(self):
u = User.objects.create_user(email_address='modperm@example.com', password='p')
self.assertTrue(u.has_module_perms('auth'))
self.assertTrue(u.has_module_perms('users'))

View file

@ -1,96 +0,0 @@
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_400_BAD_REQUEST, HTTP_401_UNAUTHORIZED
from rest_framework.viewsets import ReadOnlyModelViewSet
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.permissions import IsAuthenticatedOrReadOnly, AllowAny, IsAuthenticated
from django.contrib.auth import authenticate, login, logout
from apps.users.models import User
from apps.users.serializers import UserSerializer
class UserViewSet(ReadOnlyModelViewSet):
queryset = User.objects.all()
serializer_class = UserSerializer
permission_classes = [IsAuthenticatedOrReadOnly]
lookup_field = 'uuid'
@action(detail=False, methods=['post'], permission_classes=[AllowAny])
def login(self, request):
email_address = request.data.get('email_address')
password = request.data.get('password')
if not email_address or not password:
return Response({'error': 'Email and password are required'}, status=HTTP_400_BAD_REQUEST)
email_address = User.objects.normalize_email(email_address)
user = authenticate(request, username=email_address, password=password)
if user is None:
return Response({'error': 'Invalid credentials'}, status=HTTP_401_UNAUTHORIZED)
login(request, user)
return Response({'user': UserSerializer(user).data, 'message': 'Login successful', 'success': True}, status=HTTP_200_OK)
@action(detail=False, methods=['post'], permission_classes=[IsAuthenticated])
def logout(self, request):
logout(request)
return Response({'message': 'Logout successful', 'success': True}, status=HTTP_200_OK)
@action(detail=False, methods=['get'], permission_classes=[IsAuthenticated])
def me(self, request):
user_data = UserSerializer(request.user).data
user_data['success'] = True
return Response(user_data)
@action(detail=False, methods=['get'], permission_classes=[AllowAny])
def session(self, request):
return Response({'isAuthenticated': request.user.is_authenticated, 'isStaff': request.user.is_staff if request.user.is_authenticated else False})
@action(detail=False, methods=['post'], permission_classes=[AllowAny])
def signup(self, request):
try:
data = request.data
except:
return Response({'detail': 'Invalid data provided.', 'success': False}, status=HTTP_400_BAD_REQUEST)
email_address = data.get('email_address')
if not email_address:
return Response({'detail': 'Email address is required.', 'success': False}, status=HTTP_400_BAD_REQUEST)
email_address = User.objects.normalize_email(email_address)
if User.objects.filter(email_address=email_address).exists():
return Response({'detail': 'Email address already exists.', 'success': False}, status=HTTP_400_BAD_REQUEST)
if not data.get('first_name') or not data.get('last_name'):
return Response({'detail': 'First and last name(s) must be provided.', 'success': False}, status=HTTP_400_BAD_REQUEST)
if type(manager:=data.get('manager')) is not bool:
if manager in ['true', 'True']:
manager = True
elif manager in ['false', 'False']:
manager = False
else:
return Response({'detail': '"manager" field must be a boolean value.', 'success': False}, status=HTTP_400_BAD_REQUEST)
if data.get('password') != data.get('confirm_password'):
return Response({'detail': 'Passwords do not match.', 'success': False}, status=HTTP_400_BAD_REQUEST)
try:
user = User.objects.create_user(
email_address=email_address,
password=data.get('password'),
first_name=data.get('first_name'),
last_name=data.get('last_name'),
date_of_birth=data.get('date_of_birth'),
is_manager=manager,
)
return Response({'detail': 'User account created successfully.', 'success': True}, status=HTTP_201_CREATED)
except Exception as e:
return Response({'detail': str(e), 'success': False}, status=HTTP_400_BAD_REQUEST)
@action(detail=False, methods=['post'], permission_classes=[IsAuthenticated])
def change_password(self, request):
data = request.data
required_fields = ['old_password', 'password', 'confirm_password']
for field in required_fields:
if not data.get(field):
return Response({'detail': f'"{field}" not provided', 'success': False}, status=HTTP_400_BAD_REQUEST)
if data.get('password') != data.get('confirm_password'):
return Response({'detail': 'Passwords do not match', 'success': False}, status=HTTP_400_BAD_REQUEST)
user = request.user
if not user.check_password(data.get('old_password')):
return Response({'detail': 'Old password is incorrect', 'success': False}, status=HTTP_401_UNAUTHORIZED)
user.set_password(data.get('password'))
user.save()
return Response({'detail': 'Password changed successfully', 'success': True}, status=HTTP_200_OK)

View file

@ -1,19 +0,0 @@
FROM python:3.12-bookworm
RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
libpq-dev \
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
ENV VIRTUAL_ENV=/venv \
PATH=/venv/bin:$PATH
RUN python -m venv /venv
WORKDIR /app
COPY requirements/django.txt .
RUN pip install --no-cache-dir --requirement django.txt
CMD ["celery", "-A", "config", "worker", "-l", "info"]

View file

@ -1,23 +0,0 @@
FROM python:3.12-bookworm
RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
libpq-dev \
wait-for-it \
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
ENV VIRTUAL_ENV=/venv \
PATH=/venv/bin:$PATH
RUN python -m venv /venv
WORKDIR /app
COPY requirements/django.txt .
RUN pip install --no-cache-dir --requirement django.txt
COPY ./compose/dev/django/start /start
RUN sed -i 's/\r$//g' /start && chmod +x /start
CMD ["/start"]

View file

@ -1,27 +0,0 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
DB_HOST="${POSTGRES_HOST}"
DB_PORT="${POSTGRES_PORT}"
echo "Waiting for database at ${DB_HOST}:${DB_PORT}..."
wait-for-it ${DB_HOST}:${DB_PORT} --timeout=30 --strict || {
echo "Timed out waiting for database" >&2
exit 1
}
echo "Database is available, continuing startup..."
python manage.py makemigrations
python manage.py migrate --noinput
for fixture in /app/data/*.json; do
echo "Loading fixture: $fixture"
python manage.py loaddata "$fixture"
done
python manage.py collectstatic --noinput
exec python manage.py runserver 0.0.0:8000

View file

@ -1,116 +0,0 @@
services:
fyp-django-dev:
container_name: fyp-django-dev
build:
context: ../../
dockerfile: compose/dev/django/Dockerfile
env_file:
- ../../.env
volumes:
- ../../:/app
ports:
- '0.0.0.0:8000:8000'
depends_on:
fyp-postgres-dev:
condition: service_healthy
fyp-node-dev:
condition: service_started
fyp-node-dev:
container_name: fyp-node-dev
build:
context: ../../
dockerfile: compose/dev/node/Dockerfile
environment:
NODE_ENV: development
CHOKIDAR_USEPOLLING: 'true'
stdin_open: true
volumes:
- ../../src:/app/src:delegated
- ../../index.html:/app/index.html:delegated
- ../../vite.config.ts:/app/vite.config.ts:delegated
- ../../tsconfig.json:/app/tsconfig.json:delegated
- ../../build:/app/build:delegated
- ../../package.json:/app/package.json:delegated
- ../../package-lock.json:/app/package-lock.json:delegated
- /app/node_modules
ports:
- '0.0.0.0:5173:5173'
fyp-postgres-dev:
container_name: fyp-postgres-dev
image: pgvector/pgvector:pg15
env_file:
- ../../.env
environment:
POSTGRES_HOST_AUTH_METHOD: trust
volumes:
- fyp_postgres_data:/var/lib/postgresql/data
ports:
- '0.0.0.0:5432:5432'
healthcheck:
test: ['CMD-SHELL', 'pg_isready -h 127.0.0.1 -p 5432 -U ${POSTGRES_USER} -d ${POSTGRES_DB}']
interval: 5s
timeout: 3s
retries: 5
fyp-redis-dev:
container_name: fyp-redis-dev
image: redis:7-alpine
ports:
- '0.0.0.0:6379:6379'
volumes:
- fyp_redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 5
fyp-celery-dev:
container_name: fyp-celery-dev
build:
context: ../../
dockerfile: compose/dev/celery/Dockerfile
env_file:
- ../../.env
volumes:
- ../../:/app
depends_on:
fyp-redis-dev:
condition: service_healthy
fyp-postgres-dev:
condition: service_healthy
fyp-mcp-dev:
container_name: fyp-mcp-dev
build:
context: ../../
dockerfile: compose/dev/mcp/Dockerfile
env_file:
- ../../.env
volumes:
- ../../:/app
- ../../notebooks/build:/app/notebooks/build
deploy:
mode: replicated
replicas: 1
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- NVIDIA_VISIBLE_DEVICES=all
ports:
- "0.0.0.0:8001:8001"
depends_on:
fyp-redis-dev:
condition: service_healthy
fyp-postgres-dev:
condition: service_healthy
volumes:
fyp_postgres_data:
fyp_redis_data:

View file

@ -1,39 +0,0 @@
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
WORKDIR /app
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3 \
python3-pip \
build-essential \
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3 /usr/bin/python \
&& ln -sf /usr/bin/pip3 /usr/bin/pip
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3-dev \
libffi-dev \
libssl-dev \
cmake \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
RUN if [ ! -e /usr/lib/x86_64-linux-gnu/libcudart.so.11.0 ]; then \
found=$(ls /usr/local/cuda/lib64/libcudart.so* 2>/dev/null | head -n1 || true); \
if [ -n "$found" ]; then \
mkdir -p /usr/lib/x86_64-linux-gnu || true; \
ln -sf "$found" /usr/lib/x86_64-linux-gnu/libcudart.so.11.0 || true; \
fi; \
fi
COPY requirements/mcp.txt .
RUN pip install --no-cache-dir --requirement mcp.txt
ENV PYTHONUNBUFFERED=1
ENV DJANGO_SETTINGS_MODULE=config.settings
EXPOSE 8001
CMD ["python", "-m", "mcp_agent.mcp_server"]

View file

@ -1,15 +0,0 @@
FROM node:22-bullseye
WORKDIR /app
COPY package*.json ./
RUN npm ci && npm cache clean --force
COPY src ./src
COPY index.html .
COPY vite.config.* .
COPY tsconfig.* .
EXPOSE 5173
CMD ["npm", "run", "devwatch"]

View file

@ -1,27 +0,0 @@
FROM python:3.12.0-slim
LABEL org.opencontainers.image.title="Dynavera Celery Worker"
LABEL org.opencontainers.image.source="https://git.cs.bham.ac.uk/projects-2025-26/vxn217"
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
COPY requirements/django.txt .
RUN pip install --no-cache-dir -r django.txt
COPY manage.py manage.py
COPY config config
COPY apps apps
COPY data data
COPY mcp_agent mcp_agent
RUN mkdir -p /app/static
CMD ["celery", "-A", "config.celery", "worker", "--loglevel=info"]

View file

@ -1,51 +0,0 @@
FROM node:22-alpine AS node
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY vite.config.ts .
COPY tsconfig.json .
COPY package*.json .
COPY src ./src
COPY index.html .
RUN npm run build
FROM python:3.12.0-slim AS python
LABEL org.opencontainers.image.title="Dynavera - An Agentic Approach to Role-Specific Trainers"
LABEL org.opencontainers.image.source="https://git.cs.bham.ac.uk/projects-2025-26/vxn217"
LABEL org.opencontainers.image.description="Dynavera (Final Year Project)"
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
libpq-dev \
wait-for-it \
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
&& rm -rf /var/lib/apt/lists/*
COPY requirements/django.txt .
RUN pip install --no-cache-dir -r django.txt
COPY manage.py manage.py
COPY config config
COPY apps apps
COPY data data
COPY mcp_agent mcp_agent
COPY --from=node /app/build ./build
RUN mkdir -p /app/static
COPY ./compose/prod/django/start /start
RUN sed -i 's/\r$//g' /start && chmod +x /start
ENTRYPOINT ["/start"]

View file

@ -1,27 +0,0 @@
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
DB_HOST="${POSTGRES_HOST}"
DB_PORT="${POSTGRES_PORT}"
echo "Waiting for database at ${DB_HOST}:${DB_PORT}..."
wait-for-it ${DB_HOST}:${DB_PORT} --timeout=30 --strict || {
echo "Timed out waiting for database" >&2
exit 1
}
echo "Database is available, continuing startup..."
python manage.py makemigrations
python manage.py migrate --noinput
for fixture in /app/data/*.json; do
echo "Loading fixture: $fixture"
python manage.py loaddata "$fixture"
done
python manage.py collectstatic --noinput
exec daphne -b 0.0.0.0 -p 8000 config.asgi:application

View file

@ -1,26 +0,0 @@
services:
fyp-mcp-prod:
container_name: fyp-mcp-prod
build:
context: ../../
dockerfile: compose/dev/mcp/Dockerfile
restart: unless-stopped
deploy:
mode: replicated
replicas: 1
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
env_file:
- ../../.env
environment:
- MCP_HTTP_HOST=0.0.0.0
- MCP_HTTP_PORT=8001
- NVIDIA_VISIBLE_DEVICES=all
ports:
- '0.0.0.0:${MCP_SERVER_PORT}:8001'
volumes:
- ../../:/app

View file

@ -1,122 +0,0 @@
services:
fyp-django-prod:
container_name: fyp-django-prod
image: "${FYP_DJANGO_IMAGE}"
env_file:
- ../../.env
labels:
- "traefik.enable=true"
- "traefik.http.routers.fyp-web.rule=Host(`${DJANGO_DOMAIN_NAME}`)"
- "traefik.http.routers.fyp-web.entrypoints=${DJANGO_ENTRYPOINT}"
- "traefik.http.routers.fyp-web.tls.certresolver=${CERTRESOLVER}"
- "traefik.http.routers.fyp-web.tls=true"
- "traefik.http.services.fyp-web.loadbalancer.server.port=${DJANGO_PORT}"
- "com.centurylinklabs.watchtower.enable=true"
- "com.centurylinklabs.watchtower.scope=fyp"
volumes:
- ../../static:/app/static
- ../../media:/app/media
depends_on:
fyp-postgres-prod:
condition: service_healthy
networks:
- fyp-network
- proxy
fyp-postgres-prod:
container_name: fyp-postgres-prod
image: pgvector/pgvector:pg15
hostname: fyp-postgres-prod
restart: unless-stopped
env_file:
- ../../.env
environment:
POSTGRES_HOST_AUTH_METHOD: trust
volumes:
- fyp_postgres_data:/var/lib/postgresql/data
healthcheck:
test: ['CMD-SHELL', 'pg_isready -h 127.0.0.1 -p 5432 -U ${POSTGRES_USER} -d ${POSTGRES_DB}']
interval: 5s
timeout: 3s
retries: 5
networks:
- fyp-network
fyp-redis-prod:
container_name: fyp-redis-prod
image: redis:7-alpine
restart: unless-stopped
volumes:
- fyp_redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 5
networks:
- fyp-network
fyp-celery-prod:
container_name: fyp-celery-prod
image: "${FYP_CELERY_IMAGE}"
env_file:
- ../../.env
volumes:
- ../../:/app
- ../../static:/app/static
- ../../media:/app/media
depends_on:
fyp-redis-prod:
condition: service_healthy
fyp-postgres-prod:
condition: service_healthy
networks:
- fyp-network
fyp-watchtower-prod:
container_name: fyp-watchtower-prod
image: containrrr/watchtower
command:
- "--scope=fyp"
- "--label-enable"
- "--interval"
- "30"
- "--rolling-restart"
environment:
- WATCHTOWER_CLEANUP=true
- REPO_USER=${GITLAB_USER}
- REPO_PASS=${GITLAB_PASS}
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
fyp-runner-prod:
container_name: fyp-runner-prod
image: gitlab/gitlab-runner:${GITLAB_RUNNER_IMAGE_TAG}
restart: unless-stopped
environment:
- CI_SERVER_URL=${GITLAB_SERVER_URL}
- REGISTRATION_TOKEN=${GITLAB_RUNNER_REGISTRATION_TOKEN}
- RUNNER_EXECUTOR=docker
- RUNNER_RUN_UNTAGGED=true
- RUNNER_TAG_LIST=
- DOCKER_TLS_CERTDIR=
- DOCKER_IMAGE=${GITLAB_RUNNER_DOCKER_IMAGE}
volumes:
- gitlab-runner-config:/etc/gitlab-runner
- gitlab-machine-config:/root/.docker/machine
- /var/run/docker.sock:/var/run/docker.sock
command:
- run
- "--working-directory=/home/gitlab-runner"
volumes:
fyp_postgres_data:
fyp_redis_data:
gitlab-runner-config:
gitlab-machine-config:
networks:
fyp-network:
driver: bridge
proxy:
external: true

View file

@ -1,3 +0,0 @@
from .celery import app as celery_app
__all__ = ('celery_app',)

View file

@ -1,18 +0,0 @@
from rest_framework.routers import DefaultRouter
from apps.orgs.viewsets import OrganizationViewSet
from apps.users.viewsets import UserViewSet
from apps.mlstore.viewsets import AgentViewSet, AgentRunViewSet
from apps.onboarding.viewsets import OnboardingFlowViewSet, OnboardingPageViewSet, OnboardingFieldViewSet, OnboardingSessionViewSet
router = DefaultRouter()
router.register(r'user', UserViewSet, basename='user')
router.register(r'organization', OrganizationViewSet, basename='organization')
router.register(r'agent', AgentViewSet, basename='agent')
router.register(r'agent-run', AgentRunViewSet, basename='agent-run')
router.register(r'onboarding/flow', OnboardingFlowViewSet, basename='onboarding-flow')
router.register(r'onboarding/page', OnboardingPageViewSet, basename='onboarding-page')
router.register(r'onboarding/field', OnboardingFieldViewSet, basename='onboarding-field')
router.register(r'onboarding/session', OnboardingSessionViewSet, basename='onboarding-session')
urlpatterns = router.urls

View file

@ -1,21 +0,0 @@
import os
from django.core.asgi import get_asgi_application
from channels.auth import AuthMiddlewareStack
from channels.routing import ProtocolTypeRouter, URLRouter
from channels.security.websocket import AllowedHostsOriginValidator
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
django_asgi_app = get_asgi_application()
from apps.mlstore.routing import websocket_urlpatterns
application = ProtocolTypeRouter({
"http": django_asgi_app,
"websocket": AllowedHostsOriginValidator(
AuthMiddlewareStack(
URLRouter(websocket_urlpatterns)
)
)
})

View file

@ -1,8 +0,0 @@
from celery import Celery
import os
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
app = Celery('config')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()

View file

@ -1,210 +0,0 @@
"""
Django settings will use prefix of DJANGO_ for environment variables.
"""
import os
from pathlib import Path
import sys
from dotenv import load_dotenv
BASE_DIR = Path(__file__).resolve().parent.parent
load_dotenv(dotenv_path = BASE_DIR / '.env')
FRONT_DIR = os.getenv('DJANGO_FRONT_DIR', BASE_DIR / 'front')
MODEL_DIR = os.getenv('DJANGO_MODEL_DIR', BASE_DIR / 'model')
SECRET_KEY = os.getenv('DJANGO_SECRET_KEY')
DEBUG = str(os.getenv('DJANGO_DEBUG')).lower() in ('1', 'true', 'yes', 'on')
DOMAIN_NAME = os.getenv('DJANGO_DOMAIN_NAME', 'localhost')
ALLOWED_HOSTS = [stripped_host for host in os.getenv('DJANGO_ALLOWED_HOSTS', 'localhost').split(',') if (stripped_host:=host.strip())]
PARENT_NAME = Path(__file__).resolve().parent.name
DJANGO_CELERY_BROKER_URL = os.getenv('DJANGO_CELERY_BROKER_URL', 'redis://localhost:6379/0')
MCP_SERVER_HOST = os.getenv('MCP_SERVER_HOST', 'localhost')
MCP_SERVER_PORT = os.getenv('MCP_SERVER_PORT', '8001')
MCP_AGENT_URL = f"http://{MCP_SERVER_HOST}:{MCP_SERVER_PORT}"
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')
STATIC_ROOT = os.getenv('DJANGO_STATIC_ROOT', BASE_DIR / 'static')
MEDIA_ROOT = os.getenv('DJANGO_MEDIA_ROOT', BASE_DIR / 'media')
DB_ENGINE = os.getenv('DJANGO_DB_ENGINE', 'django.db.backends.sqlite3')
DB_NAME = os.getenv('DJANGO_POSTGRES_DB', BASE_DIR / 'db.sqlite3')
DB_USER = os.getenv('DJANGO_POSTGRES_USER')
DB_PASSWORD = os.getenv('DJANGO_POSTGRES_PASSWORD')
DB_HOST = os.getenv('DJANGO_POSTGRES_HOST')
DB_PORT = os.getenv('DJANGO_POSTGRES_PORT', 5432)
if any(arg.startswith('test') for arg in sys.argv):
DB_ENGINE = 'django.db.backends.sqlite3'
DB_NAME = ':memory:'
DB_USER = None
DB_PASSWORD = None
DB_HOST = None
DB_PORT = None
OVERRIDE_APPS = [
'jazzmin',
'daphne',
]
DJANGO_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
]
THIRD_PARTY_APPS = [
'rest_framework',
'channels',
'django_celery_results',
'django_celery_beat',
'corsheaders',
]
LOCAL_APPS = [
'apps.users',
'apps.orgs',
'apps.mlstore',
'apps.onboarding',
]
INSTALLED_APPS = OVERRIDE_APPS + DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
AUTH_USER_MODEL = 'users.User'
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'whitenoise.middleware.WhiteNoiseMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = f'{PARENT_NAME}.urls'
WSGI_APPLICATION = f'{PARENT_NAME}.wsgi.application'
ASGI_APPLICATION = f'{PARENT_NAME}.asgi.application'
CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels_redis.core.RedisChannelLayer',
'CONFIG': {
'hosts': [DJANGO_CELERY_BROKER_URL],
},
},
}
SESSION_ENGINE = 'django.contrib.sessions.backends.db'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
DATABASES = {
'default': {
'ENGINE': DB_ENGINE,
'NAME': DB_NAME,
} if DB_ENGINE == 'django.db.backends.sqlite3' else {
'ENGINE': DB_ENGINE,
'NAME': DB_NAME,
'USER': DB_USER,
'PASSWORD': DB_PASSWORD,
'HOST': DB_HOST,
'PORT': DB_PORT,
'CONN_MAX_AGE': 600,
}
}
STORAGES = {
"default": {
"BACKEND": "django.core.files.storage.FileSystemStorage",
},
"staticfiles": {
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
},
}
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
LANGUAGE_CODE = 'en-uk'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.SessionAuthentication',
'rest_framework.authentication.BasicAuthentication',
],
'DEFAULT_PERMISSION_CLASSES': [
'rest_framework.permissions.AllowAny',
],
}
CELERY_BROKER_URL = DJANGO_CELERY_BROKER_URL
CELERY_RESULT_BACKEND = 'django-db'
CELERY_CACHE_BACKEND = 'django-cache'
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TIMEZONE = 'UTC'
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60
X_FRAME_OPTIONS = 'SAMEORIGIN'
CORS_ALLOW_CREDENTIALS = True
CORS_ALLOWED_ORIGINS = [
f'http://{DOMAIN_NAME}',
f'https://{DOMAIN_NAME}',
]
CSRF_TRUSTED_ORIGINS = [
f'http://{DOMAIN_NAME}',
f'https://{DOMAIN_NAME}',
]
CSRF_COOKIE_HTTPONLY = False
CSRF_COOKIE_SECURE = not DEBUG
CSRF_COOKIE_SAMESITE = 'Lax'
SESSION_COOKIE_SAMESITE = 'Lax'
SESSION_COOKIE_HTTPONLY = True
SESSION_COOKIE_SECURE = not DEBUG
SESSION_COOKIE_AGE = 1209600
SESSION_SAVE_EVERY_REQUEST = True
if DEBUG:
CORS_ALLOWED_ORIGINS.append(f'http://{DOMAIN_NAME}:5173')
CORS_ALLOWED_ORIGINS.append(f'http://{DOMAIN_NAME}:8000')
CSRF_TRUSTED_ORIGINS.append(f'http://{DOMAIN_NAME}:5173')
CSRF_TRUSTED_ORIGINS.append(f'http://{DOMAIN_NAME}:8000')

View file

@ -1,14 +0,0 @@
from django.contrib import admin
from django.urls import path, include, re_path
from django.conf import settings
from django.conf.urls.static import static
from .views import serve_frontend
urlpatterns = [
path('admin/', admin.site.urls),
path('api/', include('config.api')),
re_path(r'^(?!static/|media/)(?P<path>.*)$', serve_frontend, {'document_root': settings.FRONT_DIR}),
*static(settings.STATIC_URL, document_root=settings.STATIC_ROOT),
*static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT),
]

View file

@ -1,14 +0,0 @@
from django.utils._os import safe_join
from django.views.static import serve as static_serve
from django.views.decorators.csrf import ensure_csrf_cookie
import posixpath
from pathlib import Path
@ensure_csrf_cookie
def serve_frontend(request, path, document_root = None):
path = posixpath.normpath(path).lstrip("/")
fullpath = Path(safe_join(document_root, path))
if fullpath.is_file():
return static_serve(request, path, document_root)
else:
return static_serve(request, "index.html", document_root)

View file

@ -1,5 +0,0 @@
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
application = get_wsgi_application()

View file

@ -1,50 +0,0 @@
[
{
"model": "users.user",
"pk": 1,
"fields": {
"password": "pbkdf2_sha256$1000000$oXMS97UxfjEaxIWWiIVGAA$MnBRR7yP5/jvB9Nszj+is73xTM+V34EXBtGkjZCEMfc=",
"last_login": "2026-01-18T11:18:56.133Z",
"is_superuser": false,
"created_at": "2026-01-17T15:36:29.304Z",
"updated_at": "2026-01-17T15:36:29.304Z",
"uuid": "cfdb469e-d4ec-42f7-bc06-0fa35e91fdcc",
"email_address": "a@gmail.com",
"first_name": "Random",
"last_name": "User",
"date_of_birth": "2001-01-01",
"bio": "",
"timezone": "UTC",
"avatar_url": "",
"is_active": true,
"is_staff": true,
"is_manager": true,
"groups": [],
"user_permissions": []
}
},
{
"model": "users.user",
"pk": 2,
"fields": {
"password": "pbkdf2_sha256$1000000$Xec1oHXVNS14f1vcEAa7OQ$RKs38j8zsCvbfIqWcSnAOaJ1ZzSf/fm5LvKYuAN2vq0=",
"last_login": "2026-01-18T18:37:52.075Z",
"is_superuser": false,
"created_at": "2026-01-18T18:37:51.896Z",
"updated_at": "2026-01-18T18:37:51.896Z",
"uuid": "1ddc7540-bbef-43d5-963b-ffd27e02c7d3",
"email_address": "b@gmail.com",
"first_name": "Example",
"last_name": "Employee",
"date_of_birth": null,
"bio": "",
"timezone": "UTC",
"avatar_url": "",
"is_active": true,
"is_staff": false,
"is_manager": false,
"groups": [],
"user_permissions": []
}
}
]

View file

@ -1,14 +0,0 @@
[
{
"model": "orgs.organization",
"pk": 1,
"fields": {
"created_at": "2026-01-18T15:29:10.238Z",
"updated_at": "2026-01-18T15:29:10.238Z",
"uuid": "c64ed6a5-f444-464c-a408-e73b71890acc",
"name": "Example Organization",
"description": "An example organization for testing purposes.",
"owner": 1
}
}
]

View file

@ -1,13 +0,0 @@
[
{
"model": "orgs.role",
"pk": 1,
"fields": {
"created_at": "2026-01-18T18:35:26.413Z",
"updated_at": "2026-01-18T18:35:26.413Z",
"name": "FNIRS Specialist",
"uuid": "39d04047-93bc-446e-8ec9-6fdf0839e48c",
"organization": 1
}
}
]

View file

@ -1,22 +0,0 @@
[
{
"model": "orgs.rolemembership",
"pk": 1,
"fields": {
"created_at": "2026-01-18T18:40:00.277Z",
"updated_at": "2026-01-18T18:40:00.277Z",
"user": 2,
"role": 1
}
},
{
"model": "orgs.organizationmembership",
"pk": 1,
"fields": {
"created_at": "2026-01-18T18:40:16.327Z",
"updated_at": "2026-01-18T18:40:16.327Z",
"user": 2,
"organization": 1
}
}
]

12
env.d.ts vendored
View file

@ -1,12 +0,0 @@
/// <reference types="vite/client" />
declare module '*.vue' {
import { DefineComponent } from 'vue'
const component: DefineComponent<
Record<string, unknown>,
Record<string, unknown>,
Record<string, unknown>,
Record<string, unknown>,
Record<string, unknown>
>
export default component
}

View file

@ -1,31 +0,0 @@
import { globalIgnores } from 'eslint/config'
import { defineConfigWithVueTs, vueTsConfigs } from '@vue/eslint-config-typescript'
import pluginVue from 'eslint-plugin-vue'
import skipFormatting from '@vue/eslint-config-prettier/skip-formatting'
// To allow more languages other than `ts` in `.vue` files, uncomment the following lines:
// import { configureVueProject } from '@vue/eslint-config-typescript'
// configureVueProject({ scriptLangs: ['ts', 'tsx'] })
// More info at https://github.com/vuejs/eslint-config-typescript/#advanced-setup
export default defineConfigWithVueTs(
{
name: 'app/files-to-lint',
files: ['**/*.{vue,ts,mts,tsx}'],
},
globalIgnores(['**/dist/**', '**/dist-ssr/**', '**/coverage/**']),
...pluginVue.configs['flat/essential'],
vueTsConfigs.recommended,
{
rules: {
indent: ['error', 4, { SwitchCase: 1 }],
'vue/html-indent': ['error', 4],
'vue/script-indent': ['error', 4, { baseIndent: 1, switchCase: 1 }],
'vue/block-lang': 'off',
},
},
skipFormatting,
)

View file

@ -1,16 +0,0 @@
<!doctype html>
<html lang="">
<head>
<meta charset="UTF-8" />
<link rel="icon" href="/favicon.ico" />
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png" />
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png" />
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Dynavera</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.ts"></script>
</body>
</html>

View file

@ -1,22 +0,0 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

View file

View file

@ -1,86 +0,0 @@
import httpx
import asyncio
import logging
logger = logging.getLogger(__name__)
class MCPClient:
def __init__(self, server_url: str, timeout: int = 3600):
self.server_url = server_url
self.client = httpx.AsyncClient(timeout=timeout)
logger.info(f"MCPClient initialized for {server_url} with timeout={timeout}s ({timeout//60} minutes)")
async def send(self, tool: str, arguments: dict):
logger.info(f"MCPClient: Sending request to {self.server_url}/execute for tool '{tool}'")
try:
response = await self.client.post(
f"{self.server_url}/execute",
json={
"tool": tool,
"arguments": arguments,
},
)
logger.info(f"MCPClient: Received response with status={response.status_code}")
logger.debug(f"MCPClient: Response headers: {response.headers}")
except asyncio.TimeoutError as e:
logger.error(f"MCPClient: Request timeout for tool '{tool}': {str(e)}")
raise Exception(f"MCP tool '{tool}' request timed out (>3600s / 1 hour). Model loading or fine-tuning may be too slow.")
except Exception as e:
logger.error(f"MCPClient: Request failed for tool '{tool}': {str(e)}", exc_info=True)
raise Exception(f"MCP tool '{tool}' request failed: {str(e)}")
if response.status_code >= 400:
error_data = {}
try:
error_data = response.json()
logger.error(f"MCPClient: HTTP error {response.status_code}: {error_data}")
except:
logger.error(f"MCPClient: HTTP error {response.status_code} (could not parse JSON)")
pass
error_msg = error_data.get("error") or error_data.get("details") or f"HTTP {response.status_code}"
raise Exception(f"MCP tool '{tool}' failed: {error_msg}. Full response: {error_data}")
try:
result = response.json()
logger.debug(f"MCPClient: Parsed JSON response: status={result.get('status')}")
except Exception as e:
logger.error(f"MCPClient: Failed to parse response JSON: {str(e)}")
logger.error(f"MCPClient: Raw response text: {response.text[:500]}")
raise Exception(f"MCP tool '{tool}' returned invalid JSON: {str(e)}")
if isinstance(result, dict) and result.get("status") == "failed":
error_msg = result.get("error") or result.get("details") or "Unknown error"
traceback_info = result.get("traceback", "")
full_error = f"MCP tool '{tool}' returned failure: {error_msg}"
if traceback_info:
full_error += f"\n\nServer traceback:\n{traceback_info}"
logger.error(f"MCPClient: {full_error}")
raise Exception(full_error)
logger.info(f"MCPClient: Tool '{tool}' completed successfully")
return result
async def health(self):
response = await self.client.get(f"{self.server_url}/health")
response.raise_for_status()
return response.json()
async def close(self):
await self.client.aclose()
async def main():
client = MCPClient("http://localhost:8001")
result = await client.send(
tool="echo",
arguments={"message": "hello from client"},
)
print(result)
await client.close()
if __name__ == "__main__":
asyncio.run(main())

File diff suppressed because it is too large Load diff

View file

@ -1,146 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0910db83",
"metadata": {},
"source": [
"# Model Testing with GPT4ALL running locally"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "47cacfc9",
"metadata": {},
"outputs": [],
"source": [
"# Imports\n",
"import json\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "484cfebc",
"metadata": {},
"outputs": [],
"source": [
"# Variables for model response\n",
"API_URL = \"http://localhost:4891/v1/chat/completions\"\n",
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
"MODEL = \"DeepSeek-R1-Distill-Qwen-7B\"\n",
"MAX_TOKENS = 2000\n",
"TEMPERATURE = 0.28"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "90b9b1f1",
"metadata": {},
"outputs": [],
"source": [
"content = \"Teach me computer vision\"\n",
"data = {\"model\": MODEL,\"messages\":[{\"role\":\"user\",\"content\": content}],\"max_tokens\": MAX_TOKENS,\"temperature\": TEMPERATURE}\n",
"response = requests.post(API_URL, json = data, headers=HEADERS)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "88a77498",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'{\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"<think>\\\\n\\\\n</think>\\\\n\\\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\\\n\\\\nHeres an introduction to get you started:\\\\n\\\\n---\\\\n\\\\n### **1. What is Computer Vision?**\\\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\\\n- Object detection\\\\n- Image classification\\\\n- Face recognition\\\\n- Medical image analysis\\\\n- Autonomous vehicle navigation\\\\n\\\\n---\\\\n\\\\n### **2. Key Concepts in Computer Vision**\\\\n#### **Image Representation**\\\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\\\n\\\\n#### **Common Tasks**\\\\n1. **Object Detection**:\\\\n - Identify the presence and location of objects in an image.\\\\n - Example: Bounding box regression.\\\\n\\\\n2. **Classification**:\\\\n - Categorize images into predefined classes (e.g., cat vs. dog).\\\\n\\\\n3. **Segmentation**:\\\\n - Partition an image into segments, each representing a different object or region.\\\\n\\\\n4. **Feature Extraction**:\\\\n - Identify and extract relevant patterns from images for further analysis.\\\\n\\\\n---\\\\n\\\\n### **3. Tools and Libraries**\\\\nTo get started with computer vision, youll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\\\n\\\\n#### **OpenCV**\\\\n- A popular open-source library for image processing.\\\\n- Features:\\\\n - Image filtering\\\\n - Edge detection\\\\n - Object tracking\\\\n - Face recognition\\\\n\\\\n#### **TensorFlow/Keras**\\\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\\\n- Easy to use and widely adopted.\\\\n\\\\n---\\\\n\\\\n### **4. Getting Started with Computer Vision**\\\\n\\\\n#### **Step 1: Learn the Basics**\\\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\\\n\\\\n#### **Step 2: Explore Datasets**\\\\nWork with common datasets:\\\\n- CIFAR-10/100\\\\n- MNIST (handwritten digits)\\\\n- COCO (common objects in context)\\\\n\\\\n#### **Step 3: Build Simple Models**\\\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\\\n\\\\n#### **Step 4: Experiment with Deep Learning**\\\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\\\n\\\\n---\\\\n\\\\n### **5. Resources for Learning**\\\\n- **Books**:\\\\n - *Deep Learning for Computer Vision* by Adrian Rosebrock\\\\n - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\\\n\\\\n- **Tutorials/Documentation**:\\\\n - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\\\n - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\\\n\\\\n- **Online Courses**:\\\\n - Coursera: \\\\\"Introduction to Computer Vision\\\\\" by Georgia Tech\\\\n - Udacity: \\\\\"Deep Learning for Computer Vision\\\\\"\\\\n - Fast.ai: Free, practical courses on computer vision.\\\\n\\\\n---\\\\n\\\\n### **6. Practice Projects**\\\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\\\n\\\\n---\\\\n\\\\n### **7. Keep Learning**\\\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\\\n- Join communities like Reddits r/computervision or Stack Overflow.\\\\n- Experiment with cutting-edge models and techniques in computer vision.\\\\n\\\\n---\\\\n\\\\nWith practice and persistence, youll become proficient in computer vision. Start small, experiment, and most importantly, have fun!\",\"role\":\"assistant\"},\"references\":null}],\"created\":1768678056,\"id\":\"placeholder\",\"model\":\"DeepSeek-R1-Distill-Qwen-7B\",\"object\":\"chat.completion\",\"usage\":{\"completion_tokens\":861,\"prompt_tokens\":8,\"total_tokens\":869}}'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.text"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c416905c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'choices': [{'finish_reason': 'stop',\n",
" 'index': 0,\n",
" 'logprobs': None,\n",
" 'message': {'content': '<think>\\n\\n</think>\\n\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\n\\nHeres an introduction to get you started:\\n\\n---\\n\\n### **1. What is Computer Vision?**\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\n- Object detection\\n- Image classification\\n- Face recognition\\n- Medical image analysis\\n- Autonomous vehicle navigation\\n\\n---\\n\\n### **2. Key Concepts in Computer Vision**\\n#### **Image Representation**\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\n\\n#### **Common Tasks**\\n1. **Object Detection**:\\n - Identify the presence and location of objects in an image.\\n - Example: Bounding box regression.\\n\\n2. **Classification**:\\n - Categorize images into predefined classes (e.g., cat vs. dog).\\n\\n3. **Segmentation**:\\n - Partition an image into segments, each representing a different object or region.\\n\\n4. **Feature Extraction**:\\n - Identify and extract relevant patterns from images for further analysis.\\n\\n---\\n\\n### **3. Tools and Libraries**\\nTo get started with computer vision, youll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\n\\n#### **OpenCV**\\n- A popular open-source library for image processing.\\n- Features:\\n - Image filtering\\n - Edge detection\\n - Object tracking\\n - Face recognition\\n\\n#### **TensorFlow/Keras**\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\n- Easy to use and widely adopted.\\n\\n---\\n\\n### **4. Getting Started with Computer Vision**\\n\\n#### **Step 1: Learn the Basics**\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\n\\n#### **Step 2: Explore Datasets**\\nWork with common datasets:\\n- CIFAR-10/100\\n- MNIST (handwritten digits)\\n- COCO (common objects in context)\\n\\n#### **Step 3: Build Simple Models**\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\n\\n#### **Step 4: Experiment with Deep Learning**\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\n\\n---\\n\\n### **5. Resources for Learning**\\n- **Books**:\\n - *Deep Learning for Computer Vision* by Adrian Rosebrock\\n - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\n\\n- **Tutorials/Documentation**:\\n - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\n - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\n\\n- **Online Courses**:\\n - Coursera: \"Introduction to Computer Vision\" by Georgia Tech\\n - Udacity: \"Deep Learning for Computer Vision\"\\n - Fast.ai: Free, practical courses on computer vision.\\n\\n---\\n\\n### **6. Practice Projects**\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\n\\n---\\n\\n### **7. Keep Learning**\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\n- Join communities like Reddits r/computervision or Stack Overflow.\\n- Experiment with cutting-edge models and techniques in computer vision.\\n\\n---\\n\\nWith practice and persistence, youll become proficient in computer vision. Start small, experiment, and most importantly, have fun!',\n",
" 'role': 'assistant'},\n",
" 'references': None}],\n",
" 'created': 1768678056,\n",
" 'id': 'placeholder',\n",
" 'model': 'DeepSeek-R1-Distill-Qwen-7B',\n",
" 'object': 'chat.completion',\n",
" 'usage': {'completion_tokens': 861, 'prompt_tokens': 8, 'total_tokens': 869}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response_data = json.loads(response.text)\n",
"response_data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2553d924",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<think>\\n\\n</think>\\n\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\n\\nHeres an introduction to get you started:\\n\\n---\\n\\n### **1. What is Computer Vision?**\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\n- Object detection\\n- Image classification\\n- Face recognition\\n- Medical image analysis\\n- Autonomous vehicle navigation\\n\\n---\\n\\n### **2. Key Concepts in Computer Vision**\\n#### **Image Representation**\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\n\\n#### **Common Tasks**\\n1. **Object Detection**:\\n - Identify the presence and location of objects in an image.\\n - Example: Bounding box regression.\\n\\n2. **Classification**:\\n - Categorize images into predefined classes (e.g., cat vs. dog).\\n\\n3. **Segmentation**:\\n - Partition an image into segments, each representing a different object or region.\\n\\n4. **Feature Extraction**:\\n - Identify and extract relevant patterns from images for further analysis.\\n\\n---\\n\\n### **3. Tools and Libraries**\\nTo get started with computer vision, youll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\n\\n#### **OpenCV**\\n- A popular open-source library for image processing.\\n- Features:\\n - Image filtering\\n - Edge detection\\n - Object tracking\\n - Face recognition\\n\\n#### **TensorFlow/Keras**\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\n- Easy to use and widely adopted.\\n\\n---\\n\\n### **4. Getting Started with Computer Vision**\\n\\n#### **Step 1: Learn the Basics**\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\n\\n#### **Step 2: Explore Datasets**\\nWork with common datasets:\\n- CIFAR-10/100\\n- MNIST (handwritten digits)\\n- COCO (common objects in context)\\n\\n#### **Step 3: Build Simple Models**\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\n\\n#### **Step 4: Experiment with Deep Learning**\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\n\\n---\\n\\n### **5. Resources for Learning**\\n- **Books**:\\n - *Deep Learning for Computer Vision* by Adrian Rosebrock\\n - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\n\\n- **Tutorials/Documentation**:\\n - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\n - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\n\\n- **Online Courses**:\\n - Coursera: \"Introduction to Computer Vision\" by Georgia Tech\\n - Udacity: \"Deep Learning for Computer Vision\"\\n - Fast.ai: Free, practical courses on computer vision.\\n\\n---\\n\\n### **6. Practice Projects**\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\n\\n---\\n\\n### **7. Keep Learning**\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\n- Join communities like Reddits r/computervision or Stack Overflow.\\n- Experiment with cutting-edge models and techniques in computer vision.\\n\\n---\\n\\nWith practice and persistence, youll become proficient in computer vision. Start small, experiment, and most importantly, have fun!'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response_data['choices'][0]['message']['content']"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load diff

View file

@ -1,353 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "45d62106",
"metadata": {},
"source": [
"# Basic RAG Implementation with a local LLM"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4c312410",
"metadata": {},
"outputs": [],
"source": [
"from gpt4all import GPT4All\n",
"from sentence_transformers import SentenceTransformer\n",
"from chromadb import PersistentClient\n",
"from docx import Document\n",
"\n",
"MODEL = \"Meta-Llama-3-8B-Instruct.Q4_0.gguf\"\n",
"CONTEXT_SIZE = 8192\n",
"EMBEDDER = \"all-MiniLM-L6-v2\"\n",
"RAG_PATH = \"./build/rag_db\"\n",
"DOCS_PATH = \"./build/documents/fNIRS_Glossary_Hardware.docx\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "90bae527",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "104f2001edc34aa5aff82734b3388041",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\nalab\\.cache\\huggingface\\hub\\models--sentence-transformers--all-MiniLM-L6-v2. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
" warnings.warn(message)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7bf16ea40d964be19217eadc81f5674e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "32962e77048440908808689c5dc386e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"README.md: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bf08ffecdfa94eaca2841e2b6b88eea5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6079ecdd0e464623a1d7e20999213213",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/612 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "60b2de9bec5c4237827d910660389db1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "05f352a112fb4ccd8968a7ffe335c80f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b5f7aa6547c0455eb55863ad8ec6c84f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"vocab.txt: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "43605d598a604c10a85effee5869939e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bd1a21fcccee4a92a50dcca08c858565",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6d409c5032674774bfe157e1ec21eb3a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"model = GPT4All(model_name = MODEL, n_ctx = CONTEXT_SIZE, allow_download = True, device = \"cuda\")\n",
"embedder = SentenceTransformer(EMBEDDER)\n",
"client = PersistentClient(path = RAG_PATH)\n",
"\n",
"\n",
"class EmbeddingFunctionWrapper:\n",
" def __init__(self, model):\n",
" self.model = model\n",
"\n",
" def name(self):\n",
" return \"sentence-transformers\"\n",
"\n",
" def __call__(self, input):\n",
" if isinstance(input, str):\n",
" texts = [input]\n",
" embs = self.model.encode(texts).tolist()\n",
" return embs[0]\n",
" else:\n",
" texts = list(input)\n",
" return self.model.encode(texts).tolist()\n",
"\n",
"embedding_fn = EmbeddingFunctionWrapper(embedder)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "34efbc7c",
"metadata": {},
"outputs": [],
"source": [
"doc = Document(DOCS_PATH)\n",
"docx_content = \"\\n\".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()])\n",
"chunk_size = 1000\n",
"documents = [docx_content[i:i+chunk_size] for i in range(0, len(docx_content), chunk_size) if docx_content[i:i+chunk_size].strip()]\n",
"embeddings = embedder.encode(documents).tolist()\n",
"collection = client.get_or_create_collection(\n",
" name = \"knowledge_base\",\n",
" embedding_function = embedding_fn,\n",
")\n",
"collection.add(\n",
" documents=documents,\n",
" embeddings=embeddings,\n",
" ids=[f\"doc{i}\" for i in range(len(documents))]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ed2cc1ff",
"metadata": {},
"outputs": [],
"source": [
"def retrieve(query, top_k = 1):\n",
" query_embedding = embedder.encode([query]).tolist()[0]\n",
" try:\n",
" results = collection.query(query_texts=[query], n_results=top_k)\n",
" return results[\"documents\"][0]\n",
" except Exception:\n",
" results = collection.query(query_embeddings=[query_embedding], n_results=top_k)\n",
" return results[\"documents\"][0]\n",
"\n",
"def rag_answer(query):\n",
" retrieved_docs = retrieve(query)\n",
" context = \"\\n\\n\".join(retrieved_docs)\n",
" max_context_length = 500\n",
" if len(context) > max_context_length:\n",
" context = context[:max_context_length] + \"...\"\n",
"\n",
" prompt = f\"\"\"\n",
"Use the context to answer the question.\n",
"Context:\n",
"{context}\n",
"Question:\n",
"{query}\n",
"Answer:\n",
"\"\"\"\n",
" print(f\"Prompt length: {len(prompt)}\")\n",
" return model.generate(prompt, max_tokens=200)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6fa9fd10",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of documents: 68\n",
"Document lengths: [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 63]\n",
"Retrieved docs length: 1\n",
"Prompt length: 627\n"
]
}
],
"source": [
"query = \"What can Frequency domain multidistance NIRS estimate?\"\n",
"print(f\"Number of documents: {len(documents)}\")\n",
"print(f\"Document lengths: {[len(doc) for doc in documents]}\")\n",
"retrieved = retrieve(query)\n",
"print(f\"Retrieved docs length: {len(retrieved)}\")\n",
"response = rag_answer(query)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5a82353e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Frequency-domain (FD) multidistance NIRS technique can estimate absolute values of absorption and scattering of the medium, and subsequently chromophore concentrations.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,583 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c9cd197e",
"metadata": {},
"source": [
"# Prepare Training File: Load Model & Generate Training Pairs\n",
"\n",
"This notebook loads a language model and uses it to generate structured instruction/response training pairs from any input file. The generated pairs can be used directly for fine-tuning."
]
},
{
"cell_type": "markdown",
"id": "556d3fe5",
"metadata": {},
"source": [
"## Setup: Environment Variables\n",
"\n",
"Configure CUDA and PyTorch environment variables to disable BF16 and FP16 precision reductions for stable training."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a25b6a3b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"CUDA_DISABLE_BF16\"] = \"1\"\n",
"os.environ[\"TORCH_CUDA_ALLOW_BF16_REDUCED_PRECISION_REDUCTION\"] = \"0\"\n",
"os.environ[\"ACCELERATE_DISABLE_FP16\"] = \"1\""
]
},
{
"cell_type": "markdown",
"id": "97b9e212",
"metadata": {},
"source": [
"## Setup: Import Required Libraries\n",
"\n",
"Import necessary libraries including transformers, torch, datasets, python-docx, json, os, and other utilities for document processing and model loading."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d63d552",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import logging\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"from docx import Document\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
"import torch\n",
"\n",
"logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
"logger = logging.getLogger(__name__)"
]
},
{
"cell_type": "markdown",
"id": "84e04da2",
"metadata": {},
"source": [
"## Setup: Configure Directory Structure\n",
"\n",
"Create and organize directory paths for storing training data, models, and intermediate outputs."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "993ed003",
"metadata": {},
"outputs": [],
"source": [
"OUTPUT_DIR = Path(\"./build/training_prep\")\n",
"OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
"DATA_DIR = OUTPUT_DIR / \"data\"\n",
"DATA_DIR.mkdir(exist_ok=True)\n",
"MODELS_DIR = OUTPUT_DIR / \"models\"\n",
"MODELS_DIR.mkdir(exist_ok=True)\n",
"\n",
"MODEL_CACHE_DIR = Path(\"./model/base-model\")\n",
"MODEL_CACHE_DIR.mkdir(parents=True, exist_ok=True)\n",
"os.environ[\"HF_HOME\"] = str(MODEL_CACHE_DIR)\n",
"\n",
"logger.info(f\"Output directory: {OUTPUT_DIR}\")\n",
"logger.info(f\"Model cache directory: {MODEL_CACHE_DIR}\")"
]
},
{
"cell_type": "markdown",
"id": "0439c534",
"metadata": {},
"source": [
"## Setup: Helper Functions\n",
"\n",
"Define utility functions for loading various file formats (DOCX, JSON, JSONL)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e34ff2b7",
"metadata": {},
"outputs": [],
"source": [
"def load_docx_file(file_path: str) -> list:\n",
" \"\"\"Load and parse a DOCX file into paragraphs.\"\"\"\n",
" logger.info(f\"Loading DOCX file: {file_path}\")\n",
" doc = Document(file_path)\n",
" paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]\n",
" logger.info(f\"Extracted {len(paragraphs)} paragraphs from {file_path}\")\n",
" return paragraphs\n",
"\n",
"\n",
"def load_json_file(file_path: str) -> list:\n",
" \"\"\"Load a JSON file (array or object).\"\"\"\n",
" logger.info(f\"Loading JSON file: {file_path}\")\n",
" with open(file_path, 'r', encoding='utf-8') as f:\n",
" data = json.load(f)\n",
" if isinstance(data, list):\n",
" logger.info(f\"Loaded {len(data)} items from JSON file\")\n",
" return data\n",
" elif isinstance(data, dict):\n",
" logger.info(f\"JSON file is dict, converting to list\")\n",
" return [data]\n",
" return []\n",
"\n",
"\n",
"def load_jsonl_file(file_path: str) -> list:\n",
" \"\"\"Load a JSONL file (one JSON object per line).\"\"\"\n",
" logger.info(f\"Loading JSONL file: {file_path}\")\n",
" items = []\n",
" with open(file_path, 'r', encoding='utf-8') as f:\n",
" for line in f:\n",
" if line.strip():\n",
" items.append(json.loads(line))\n",
" logger.info(f\"Loaded {len(items)} items from JSONL file\")\n",
" return items\n",
"\n",
"\n",
"def load_training_file(file_path: str) -> list:\n",
" \"\"\"Load training file based on extension.\"\"\"\n",
" ext = Path(file_path).suffix.lower()\n",
" if ext == '.docx':\n",
" return load_docx_file(file_path)\n",
" elif ext == '.json':\n",
" return load_json_file(file_path)\n",
" elif ext == '.jsonl':\n",
" return load_jsonl_file(file_path)\n",
" else:\n",
" raise ValueError(f\"Unsupported file format: {ext}\")\n",
"\n",
"\n",
"logger.info(\"Helper functions defined\")"
]
},
{
"cell_type": "markdown",
"id": "3bea7ee7",
"metadata": {},
"source": [
"## Step 1: Load and Configure the Base Model\n",
"\n",
"Load Meta-Llama-3-8B-Instruct with 4-bit quantization for efficient pair generation. The model will read your input file and generate formatted instruction/response pairs."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0348d7d6",
"metadata": {},
"outputs": [],
"source": [
"if not torch.cuda.is_available():\n",
" raise RuntimeError(\"CUDA not available. Please run in a GPU environment.\")\n",
"\n",
"logger.info(f\"Using GPU: {torch.cuda.get_device_name(0)}\")\n",
"\n",
"BASE_MODEL = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
"\n",
"logger.info(f\"Loading base model: {BASE_MODEL}\")\n",
"tokenizer = AutoTokenizer.from_pretrained(\n",
" BASE_MODEL,\n",
" cache_dir=str(MODEL_CACHE_DIR),\n",
" local_files_only=False,\n",
")\n",
"if tokenizer.pad_token is None:\n",
" tokenizer.pad_token = tokenizer.eos_token\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" BASE_MODEL,\n",
" cache_dir=str(MODEL_CACHE_DIR),\n",
" quantization_config=BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_compute_dtype=torch.float16\n",
" ),\n",
" device_map=\"auto\",\n",
" dtype=torch.float16,\n",
")\n",
"\n",
"logger.info(\"Model loaded successfully\")"
]
},
{
"cell_type": "markdown",
"id": "bbb7155b",
"metadata": {},
"source": [
"## Step 2: Load Your Training File\n",
"\n",
"Specify the path to your training file (DOCX, JSON, or JSONL). The notebook will parse it and prepare it for pair generation."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe29c8b2",
"metadata": {},
"outputs": [],
"source": [
"TRAINING_FILE = \"./model/data/data.docx\"\n",
"training_data = load_training_file(TRAINING_FILE)\n",
"logger.info(f\"Loaded {len(training_data)} items from training file\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70aa4949",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Loaded {len(training_data)} items\")\n",
"print(f\"First item type: {type(training_data[0])}\")\n",
"print(f\"First item (first 200 chars): {str(training_data[0])[:200]}\")\n",
"if isinstance(training_data[0], dict):\n",
" print(f\"First item keys: {list(training_data[0].keys())}\")"
]
},
{
"cell_type": "markdown",
"id": "cdfdaa4d",
"metadata": {},
"source": [
"## Step 3: Generate Training Pairs Using the Model\n",
"\n",
"The model will read your data and generate structured instruction/response pairs using a prompt-based approach. This ensures consistent formatting for fine-tuning."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f36ab365",
"metadata": {},
"outputs": [],
"source": [
"def format_training_sample(sample) -> str:\n",
" \"\"\"Convert a training item into a concise text description.\"\"\"\n",
" try:\n",
" if isinstance(sample, dict):\n",
" parts = []\n",
" for k, v in sample.items():\n",
" if isinstance(v, str) and v.strip():\n",
" parts.append(f\"{k}: {v.strip()}\")\n",
" return \" | \".join(parts) if parts else json.dumps(sample)\n",
" if isinstance(sample, str):\n",
" return sample.strip()\n",
" return str(sample)\n",
" except Exception:\n",
" return str(sample)\n",
"\n",
"\n",
"def get_optimal_batch_size() -> int:\n",
" \"\"\"Calculate optimal batch size based on available GPU memory.\"\"\"\n",
" if not torch.cuda.is_available():\n",
" return 5\n",
"\n",
" try:\n",
" gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024**3)\n",
"\n",
" logger.info(f\"GPU total memory: {gpu_mem:.2f} GB\")\n",
"\n",
" if gpu_mem >= 24:\n",
" return 20\n",
" elif gpu_mem >= 16:\n",
" return 15\n",
" elif gpu_mem >= 12:\n",
" return 12\n",
" elif gpu_mem >= 8:\n",
" return 8\n",
" else:\n",
" return 5\n",
" except Exception as e:\n",
" logger.warning(f\"Could not determine GPU memory: {e}. Using conservative batch size.\")\n",
" return 5\n",
"\n",
"\n",
"def generate_pairs_with_model(training_data: list, batch_size: int = None, max_tokens: int = 2048) -> list:\n",
" \"\"\"\n",
" Use the model to generate instruction/response pairs from training data.\n",
" Processes data in batches to fit within GPU memory constraints.\n",
"\n",
" Args:\n",
" training_data: List of training items to process\n",
" batch_size: Number of items per batch (None = auto-detect based on GPU memory)\n",
" max_tokens: Maximum tokens to generate per batch (default: 2048)\n",
" \"\"\"\n",
" if batch_size is None:\n",
" batch_size = get_optimal_batch_size()\n",
"\n",
" logger.info(f\"Generating training pairs from {len(training_data)} items\")\n",
" logger.info(f\"Batch size: {batch_size}, Max tokens per batch: {max_tokens}\")\n",
"\n",
" all_pairs = []\n",
"\n",
" DEBUG_OUTPUT = False\n",
"\n",
" for i in range(0, len(training_data), batch_size):\n",
" batch = training_data[i:i+batch_size]\n",
" batch_num = i//batch_size + 1\n",
" total_batches = (len(training_data) + batch_size - 1)//batch_size\n",
"\n",
" logger.info(f\"Processing batch {batch_num}/{total_batches} ({len(batch)} items)\")\n",
"\n",
" formatted = [f\"{j+1}. {format_training_sample(item)}\" for j, item in enumerate(batch)]\n",
" data_block = \"\\n\".join(formatted)\n",
"\n",
" system_prompt = (\n",
" \"You are a JSON generator. Your task is to read content and output ONLY a valid JSON array.\\n\"\n",
" \"Each object must have exactly two fields: 'instruction' and 'response'.\\n\"\n",
" \"Do not include any text before or after the JSON array.\\n\"\n",
" \"The instruction field should be a question or task from the content.\\n\"\n",
" \"The response field should be the answer extracted from the content.\\n\"\n",
" \"Output MUST be valid JSON - nothing else.\"\n",
" )\n",
"\n",
" user_prompt = (\n",
" f\"Content to extract training pairs from:\\n{data_block}\\n\\n\"\n",
" \"Output a JSON array with instruction-response pairs. Output ONLY the JSON array, no other text:\"\n",
" )\n",
"\n",
" prompt = f\"<|im_start|>system\\n{system_prompt}<|im_end|>\\n<|im_start|>user\\n{user_prompt}<|im_end|>\\n<|im_start|>assistant\\n[\"\n",
"\n",
" try:\n",
" inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
"\n",
" if torch.cuda.is_available():\n",
" torch.cuda.empty_cache()\n",
"\n",
" with torch.no_grad():\n",
" output = model.generate(\n",
" **inputs,\n",
" max_new_tokens=max_tokens,\n",
" do_sample=True,\n",
" temperature=0.7,\n",
" top_p=0.95,\n",
" top_k=50,\n",
" eos_token_id=tokenizer.eos_token_id,\n",
" )\n",
"\n",
" input_length = inputs.input_ids.shape[1]\n",
" generated_tokens = output[0][input_length:]\n",
" decoded = tokenizer.decode(generated_tokens, skip_special_tokens=True)\n",
"\n",
" if DEBUG_OUTPUT:\n",
" print(f\"\\n[BATCH {batch_num} RAW OUTPUT]\")\n",
" print(decoded[:500])\n",
" print(\"\\n---\")\n",
" logger.debug(f\"Model output (first 300 chars): {decoded[:300]}\")\n",
"\n",
" json_text = \"[\" + decoded\n",
"\n",
" json_start = json_text.find(\"[\")\n",
" if json_start == -1:\n",
" logger.warning(f\"No JSON array found in batch {batch_num} output\")\n",
" if DEBUG_OUTPUT:\n",
" print(f\"[BATCH {batch_num}] No '[' found in output\")\n",
" continue\n",
"\n",
" bracket_count = 0\n",
" in_string = False\n",
" escape_next = False\n",
" json_end = -1\n",
"\n",
" for idx in range(json_start, len(json_text)):\n",
" char = json_text[idx]\n",
"\n",
" if escape_next:\n",
" escape_next = False\n",
" continue\n",
"\n",
" if char == '\\\\':\n",
" escape_next = True\n",
" continue\n",
"\n",
" if char == '\"' and not escape_next:\n",
" in_string = not in_string\n",
" continue\n",
"\n",
" if not in_string:\n",
" if char == '[':\n",
" bracket_count += 1\n",
" elif char == ']':\n",
" bracket_count -= 1\n",
" if bracket_count == 0:\n",
" json_end = idx\n",
" break\n",
"\n",
" if json_end == -1:\n",
" logger.warning(f\"Failed to find JSON array boundary in batch {batch_num}\")\n",
" continue\n",
"\n",
" try:\n",
" json_text = json_text[json_start: json_end + 1]\n",
" parsed = json.loads(json_text)\n",
"\n",
" batch_pairs = 0\n",
" for item in parsed:\n",
" instr = str(item.get(\"instruction\", \"\")).strip()\n",
" resp = str(item.get(\"response\", \"\")).strip()\n",
" if instr and resp:\n",
" all_pairs.append((instr, resp))\n",
" if DEBUG_OUTPUT:\n",
" print(f\"Instruction: {instr}\\nResponse: {resp}\\n---\")\n",
" batch_pairs += 1\n",
"\n",
" logger.info(f\"Extracted {batch_pairs} pairs from batch {batch_num}\")\n",
" except json.JSONDecodeError as e:\n",
" logger.error(f\"Failed to parse JSON in batch {batch_num}: {str(e)}\")\n",
" if DEBUG_OUTPUT:\n",
" logger.debug(f\"JSON text attempted (first 500 chars): {json_text[:500]}\")\n",
"\n",
" try:\n",
" json_text_fixed = json_text.replace(',]', ']').replace(',}', '}')\n",
" parsed = json.loads(json_text_fixed)\n",
"\n",
" batch_pairs = 0\n",
" for item in parsed:\n",
" instr = str(item.get(\"instruction\", \"\")).strip()\n",
" resp = str(item.get(\"response\", \"\")).strip()\n",
" if instr and resp:\n",
" all_pairs.append((instr, resp))\n",
" if DEBUG_OUTPUT:\n",
" print(f\"Instruction: {instr}\\nResponse: {resp}\\n---\")\n",
" batch_pairs += 1\n",
"\n",
" logger.info(f\"Fixed JSON and extracted {batch_pairs} pairs from batch {batch_num}\")\n",
" except Exception as e2:\n",
" logger.error(f\"Could not fix JSON in batch {batch_num}: {str(e2)}\")\n",
" continue\n",
" except Exception as e:\n",
" logger.error(f\"Unexpected error parsing batch {batch_num}: {str(e)}\")\n",
" continue\n",
"\n",
" except RuntimeError as e:\n",
" if \"out of memory\" in str(e).lower():\n",
" logger.error(f\"OOM in batch {batch_num}. Try reducing batch_size or max_tokens.\")\n",
" if torch.cuda.is_available():\n",
" torch.cuda.empty_cache()\n",
" continue\n",
" raise\n",
"\n",
" logger.info(f\"Total pairs generated: {len(all_pairs)}\")\n",
" return all_pairs\n",
"\n",
"\n",
"training_pairs = generate_pairs_with_model(training_data, batch_size=None, max_tokens=2048)\n",
"logger.info(f\"Generated {len(training_pairs)} training pairs\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85673dcd",
"metadata": {},
"outputs": [],
"source": [
"print(f\"\\n{'='*80}\")\n",
"print(f\"Total training pairs generated: {len(training_pairs)}\")\n",
"print(f\"{'='*80}\\n\")\n",
"\n",
"if training_pairs:\n",
" print(\"Sample training pairs:\")\n",
" for i, (instr, resp) in enumerate(training_pairs[:3], 1):\n",
" print(f\"\\nPair {i}:\")\n",
" print(f\" Instruction: {instr[:100]}{'...' if len(instr) > 100 else ''}\")\n",
" print(f\" Response: {resp[:100]}{'...' if len(resp) > 100 else ''}\")"
]
},
{
"cell_type": "markdown",
"id": "4dec03c6",
"metadata": {},
"source": [
"## Step 4: Save Training Data to JSONL Format\n",
"\n",
"Export the generated pairs to a JSONL file for use with fine-tuning pipelines."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0f727ee",
"metadata": {},
"outputs": [],
"source": [
"output_file = DATA_DIR / \"generated_training_pairs.jsonl\"\n",
"\n",
"logger.info(f\"Saving {len(training_pairs)} pairs to {output_file}\")\n",
"\n",
"with open(output_file, 'w', encoding='utf-8') as f:\n",
" for instruction, response in training_pairs:\n",
" training_pair = {\n",
" \"instruction\": instruction,\n",
" \"output\": response,\n",
" }\n",
" f.write(json.dumps(training_pair, ensure_ascii=False) + \"\\n\")\n",
"\n",
"logger.info(f\"Training data saved to {output_file}\")\n",
"print(f\"\\n✓ Training pairs saved to: {output_file}\")"
]
},
{
"cell_type": "markdown",
"id": "761f92c1",
"metadata": {},
"source": [
"## Cleanup\n",
"\n",
"Free GPU memory after pair generation is complete."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db644782",
"metadata": {},
"outputs": [],
"source": [
"del model\n",
"del tokenizer\n",
"import gc\n",
"gc.collect()\n",
"\n",
"if torch.cuda.is_available():\n",
" torch.cuda.empty_cache()\n",
" torch.cuda.synchronize()\n",
"\n",
"logger.info(\"GPU memory freed\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,393 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "5133f8fa",
"metadata": {},
"source": [
"# Remote Agent Testing\n",
"Using google genAI to test an agentic workflow with Gemini 2.5"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "62ec2147",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Imports\n",
"import os \n",
"from dotenv import load_dotenv\n",
"from langchain.agents import create_agent\n",
"from langchain.agents.middleware import dynamic_prompt, ModelRequest\n",
"from langchain.chat_models import init_chat_model\n",
"from langchain.tools import tool\n",
"from langchain_chroma import Chroma\n",
"from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"load_dotenv(os.path.join('', '..', '.env'))"
]
},
{
"cell_type": "markdown",
"id": "6dc525a1",
"metadata": {},
"source": [
"Using Gemini 2.5 via Langchain's Google Generative AI integration to test an agentic workflow."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a401cf8a",
"metadata": {},
"outputs": [],
"source": [
"\n",
"model = init_chat_model(\"google_genai:gemini-2.5-flash-lite\")"
]
},
{
"cell_type": "markdown",
"id": "aaa68979",
"metadata": {},
"source": [
"Setting up embeddings model"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "45805907",
"metadata": {},
"outputs": [],
"source": [
"\n",
"embeddings = GoogleGenerativeAIEmbeddings(model=\"models/gemini-embedding-001\")"
]
},
{
"cell_type": "markdown",
"id": "b3f90586",
"metadata": {},
"source": [
"Vector store setup for data storage and retrieval"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "500f90f4",
"metadata": {},
"outputs": [],
"source": [
"vector_store = Chroma(\n",
" collection_name=\"example_collection\",\n",
" embedding_function=embeddings,\n",
" persist_directory=\"./build/langchain_db\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d4ff7ec0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6,900 pages later… *“This story is just for that one reader.”* \n",
"*Omniscient Readers Viewpoint* is probably one of the most ambitious epics Ive ever read in this genre. Regression-themed novels are already a flooded trope, but this one blows the rest out of the water purely from how many layers it stacks on top of itself and still manages to come out narratively clean. When I first got into this series (via the webtoon, like most people), the wait between weekly releases drove me up the wall,\n",
"Total characters: 8578\n"
]
}
],
"source": [
"import requests\n",
"from langchain_core.documents import Document\n",
"\n",
"response = requests.get(\"https://viswamedha.com/api/post/a-story-for-one-reader/\")\n",
"data = response.json()\n",
"content = data['content']\n",
"\n",
"docs = [Document(page_content=content, metadata={\"source\": response.url})]\n",
"\n",
"assert len(docs) == 1\n",
"print(docs[0].page_content[:500])\n",
"print(f\"Total characters: {len(docs[0].page_content)}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "82bcfabc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Split blog post into 13 sub-documents.\n"
]
}
],
"source": [
"\n",
"text_splitter = RecursiveCharacterTextSplitter(\n",
" chunk_size=1000, \n",
" chunk_overlap=200, \n",
" add_start_index=True,\n",
")\n",
"all_splits = text_splitter.split_documents(docs)\n",
"\n",
"print(f\"Split blog post into {len(all_splits)} sub-documents.\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2ee1a9ca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['44706f38-6bd0-4e9a-8a41-d27790bdddc8', '9d2a2300-a311-4389-86b8-71eef221186c', '3970098b-f681-47bb-8c1d-6929cb67b537']\n"
]
}
],
"source": [
"document_ids = vector_store.add_documents(documents=all_splits)\n",
"\n",
"print(document_ids[:3])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a9096893",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"@tool(response_format=\"content_and_artifact\")\n",
"def retrieve_context(query: str):\n",
" \"\"\"Retrieve information to help answer a query.\"\"\"\n",
" retrieved_docs = vector_store.similarity_search(query, k=2)\n",
" serialized = \"\\n\\n\".join(\n",
" (f\"Source: {doc.metadata}\\nContent: {doc.page_content}\")\n",
" for doc in retrieved_docs\n",
" )\n",
" return serialized, retrieved_docs"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "dff2345d",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"tools = [retrieve_context]\n",
"prompt = (\n",
" \"You have access to a tool that retrieves context from a blog post. \"\n",
" \"Use the tool to help answer user queries.\"\n",
")\n",
"agent = create_agent(model, tools, system_prompt=prompt)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "aaa2fad9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What is the significance of the second loop?\n",
"\n",
"Use the retrieved context to provide a detailed answer.\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"Tool Calls:\n",
" retrieve_context (b6c5ce4e-a030-47cf-8fed-f1279f022766)\n",
" Call ID: b6c5ce4e-a030-47cf-8fed-f1279f022766\n",
" Args:\n",
" query: Significance of the second loop\n",
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
"Name: retrieve_context\n",
"\n",
"Source: {'start_index': 3377, 'source': 'https://viswamedha.com/api/post/a-story-for-one-reader/'}\n",
"Content: And this is where the paradox really hits. The Great Plotter, while observing regressions and chasing a better ending, ends up **creating the very timeline** hes been watching. In trying to fix his own story, he triggers a new one. He unknowingly causes the very events that lead to KDJs worldline existing in the first place. It's absolutely wild. He becomes the most influential figure in this timeline, yet completely powerless to interact with it directly (due to the constraints of Probability). All he can do is watch as KDJ lives through the story he thought he already knew.\n",
"\n",
"---\n",
"\n",
"## What is the second paradox, and where does the loop begin?\n",
"\n",
"Source: {'start_index': 3377, 'source': 'https://viswamedha.com/api/post/a-story-for-one-reader/'}\n",
"Content: And this is where the paradox really hits. The Great Plotter, while observing regressions and chasing a better ending, ends up **creating the very timeline** hes been watching. In trying to fix his own story, he triggers a new one. He unknowingly causes the very events that lead to KDJs worldline existing in the first place. It's absolutely wild. He becomes the most influential figure in this timeline, yet completely powerless to interact with it directly (due to the constraints of Probability). All he can do is watch as KDJ lives through the story he thought he already knew.\n",
"\n",
"---\n",
"\n",
"## What is the second paradox, and where does the loop begin?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"The second loop is significant because it represents a paradox where the Great Plotter, in his attempt to alter his own story and create a better ending, inadvertently becomes the catalyst for the very timeline he is observing. He ends up creating the timeline he has been watching, triggering new events, and causing the existence of KDJ's worldline. Despite being the most influential figure in this new timeline, the Great Plotter is powerless to intervene directly and can only watch as KDJ experiences the story.\n"
]
}
],
"source": [
"query = (\n",
" \"What is the significance of the second loop?\\n\\n\"\n",
" \"Use the retrieved context to provide a detailed answer.\"\n",
")\n",
"\n",
"for event in agent.stream(\n",
" {\"messages\": [{\"role\": \"user\", \"content\": query}]},\n",
" stream_mode=\"values\",\n",
"):\n",
" event[\"messages\"][-1].pretty_print()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "bda6d7d0",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"@dynamic_prompt\n",
"def prompt_with_context(request: ModelRequest) -> str:\n",
" \"\"\"Inject context into state messages.\"\"\"\n",
" last_query = request.state[\"messages\"][-1].text\n",
" retrieved_docs = vector_store.similarity_search(last_query)\n",
"\n",
" docs_content = \"\\n\\n\".join(doc.page_content for doc in retrieved_docs)\n",
"\n",
" system_message = (\n",
" \"You are a helpful assistant. Use the following context in your response:\"\n",
" f\"\\n\\n{docs_content}\"\n",
" )\n",
"\n",
" return system_message\n",
"\n",
"\n",
"agent = create_agent(model, tools=[], middleware=[prompt_with_context])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1540855c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'messages': [HumanMessage(content='What is the significance of the second loop?\\n\\n', additional_kwargs={}, response_metadata={}, id='eaca10e5-a350-4ad8-80ad-c62645b69e5a')]}\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What is the significance of the second loop?\n",
"\n",
"\n"
]
},
{
"ename": "GoogleGenerativeAIError",
"evalue": "Error embedding content: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mServerError\u001b[39m Traceback (most recent call last)",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_google_genai\\embeddings.py:480\u001b[39m, in \u001b[36mGoogleGenerativeAIEmbeddings.embed_query\u001b[39m\u001b[34m(self, text, task_type, title, output_dimensionality)\u001b[39m\n\u001b[32m 479\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m480\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmodels\u001b[49m\u001b[43m.\u001b[49m\u001b[43membed_content\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 481\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 482\u001b[39m \u001b[43m \u001b[49m\u001b[43mcontents\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 483\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 484\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 485\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\models.py:4179\u001b[39m, in \u001b[36mModels.embed_content\u001b[39m\u001b[34m(self, model, contents, config)\u001b[39m\n\u001b[32m 4177\u001b[39m request_dict = _common.encode_unserializable_types(request_dict)\n\u001b[32m-> \u001b[39m\u001b[32m4179\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_api_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 4180\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mpost\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequest_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_options\u001b[49m\n\u001b[32m 4181\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4183\u001b[39m response_dict = {} \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response.body \u001b[38;5;28;01melse\u001b[39;00m json.loads(response.body)\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1386\u001b[39m, in \u001b[36mBaseApiClient.request\u001b[39m\u001b[34m(self, http_method, path, request_dict, http_options)\u001b[39m\n\u001b[32m 1383\u001b[39m http_request = \u001b[38;5;28mself\u001b[39m._build_request(\n\u001b[32m 1384\u001b[39m http_method, path, request_dict, http_options\n\u001b[32m 1385\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1386\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhttp_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m 1387\u001b[39m response_body = (\n\u001b[32m 1388\u001b[39m response.response_stream[\u001b[32m0\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m response.response_stream \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m'\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m 1389\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1222\u001b[39m, in \u001b[36mBaseApiClient._request\u001b[39m\u001b[34m(self, http_request, http_options, stream)\u001b[39m\n\u001b[32m 1220\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m retry(\u001b[38;5;28mself\u001b[39m._request_once, http_request, stream) \u001b[38;5;66;03m# type: ignore[no-any-return]\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1222\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request_once\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:477\u001b[39m, in \u001b[36mRetrying.__call__\u001b[39m\u001b[34m(self, fn, *args, **kwargs)\u001b[39m\n\u001b[32m 476\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m477\u001b[39m do = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 478\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:378\u001b[39m, in \u001b[36mBaseRetrying.iter\u001b[39m\u001b[34m(self, retry_state)\u001b[39m\n\u001b[32m 377\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m action \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.iter_state.actions:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m result = \u001b[43maction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 379\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:420\u001b[39m, in \u001b[36mBaseRetrying._post_stop_check_actions.<locals>.exc_check\u001b[39m\u001b[34m(rs)\u001b[39m\n\u001b[32m 419\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.reraise:\n\u001b[32m--> \u001b[39m\u001b[32m420\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mretry_exc\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 421\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m retry_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mfut\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mexception\u001b[39;00m()\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:187\u001b[39m, in \u001b[36mRetryError.reraise\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 186\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.last_attempt.failed:\n\u001b[32m--> \u001b[39m\u001b[32m187\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlast_attempt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
"\u001b[36mFile \u001b[39m\u001b[32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.13_3.13.2544.0_x64__qbz5n2kfra8p0\\Lib\\concurrent\\futures\\_base.py:449\u001b[39m, in \u001b[36mFuture.result\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 448\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._state == FINISHED:\n\u001b[32m--> \u001b[39m\u001b[32m449\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 451\u001b[39m \u001b[38;5;28mself\u001b[39m._condition.wait(timeout)\n",
"\u001b[36mFile \u001b[39m\u001b[32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.13_3.13.2544.0_x64__qbz5n2kfra8p0\\Lib\\concurrent\\futures\\_base.py:401\u001b[39m, in \u001b[36mFuture.__get_result\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 400\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m401\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m._exception\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 403\u001b[39m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:480\u001b[39m, in \u001b[36mRetrying.__call__\u001b[39m\u001b[34m(self, fn, *args, **kwargs)\u001b[39m\n\u001b[32m 479\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m480\u001b[39m result = \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 481\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m: \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1199\u001b[39m, in \u001b[36mBaseApiClient._request_once\u001b[39m\u001b[34m(self, http_request, stream)\u001b[39m\n\u001b[32m 1192\u001b[39m response = \u001b[38;5;28mself\u001b[39m._httpx_client.request(\n\u001b[32m 1193\u001b[39m method=http_request.method,\n\u001b[32m 1194\u001b[39m url=http_request.url,\n\u001b[32m (...)\u001b[39m\u001b[32m 1197\u001b[39m timeout=http_request.timeout,\n\u001b[32m 1198\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1199\u001b[39m \u001b[43merrors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mAPIError\u001b[49m\u001b[43m.\u001b[49m\u001b[43mraise_for_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1200\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m HttpResponse(\n\u001b[32m 1201\u001b[39m response.headers, response \u001b[38;5;28;01mif\u001b[39;00m stream \u001b[38;5;28;01melse\u001b[39;00m [response.text]\n\u001b[32m 1202\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\errors.py:121\u001b[39m, in \u001b[36mAPIError.raise_for_response\u001b[39m\u001b[34m(cls, response)\u001b[39m\n\u001b[32m 119\u001b[39m response_json = response.body_segments[\u001b[32m0\u001b[39m].get(\u001b[33m'\u001b[39m\u001b[33merror\u001b[39m\u001b[33m'\u001b[39m, {})\n\u001b[32m--> \u001b[39m\u001b[32m121\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mraise_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstatus_code\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_json\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\errors.py:148\u001b[39m, in \u001b[36mAPIError.raise_error\u001b[39m\u001b[34m(cls, status_code, response_json, response)\u001b[39m\n\u001b[32m 147\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[32m500\u001b[39m <= status_code < \u001b[32m600\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m148\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ServerError(status_code, response_json, response)\n\u001b[32m 149\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[31mServerError\u001b[39m: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mGoogleGenerativeAIError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[13]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43magent\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mvalues\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mimport\u001b[39;49;00m\u001b[38;5;250;43m \u001b[39;49m\u001b[34;43;01mpprint\u001b[39;49;00m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mpprint\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# inspect the event structure\u001b[39;49;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\main.py:2646\u001b[39m, in \u001b[36mPregel.stream\u001b[39m\u001b[34m(self, input, config, context, stream_mode, print_mode, output_keys, interrupt_before, interrupt_after, durability, subgraphs, debug, **kwargs)\u001b[39m\n\u001b[32m 2644\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m loop.match_cached_writes():\n\u001b[32m 2645\u001b[39m loop.output_writes(task.id, task.writes, cached=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m-> \u001b[39m\u001b[32m2646\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtick\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2647\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtasks\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrites\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2648\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstep_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2649\u001b[39m \u001b[43m \u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m=\u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2650\u001b[39m \u001b[43m \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43maccept_push\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2651\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 2652\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# emit output\u001b[39;49;00m\n\u001b[32m 2653\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01myield from\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_output\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2654\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprint_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubgraphs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mqueue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mEmpty\u001b[49m\n\u001b[32m 2655\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2656\u001b[39m loop.after_tick()\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\_runner.py:167\u001b[39m, in \u001b[36mPregelRunner.tick\u001b[39m\u001b[34m(self, tasks, reraise, timeout, retry_policy, get_waiter, schedule_task)\u001b[39m\n\u001b[32m 165\u001b[39m t = tasks[\u001b[32m0\u001b[39m]\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m167\u001b[39m \u001b[43mrun_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 168\u001b[39m \u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 169\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfigurable\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mCONFIG_KEY_CALL\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43m_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m=\u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m \u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 180\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 181\u001b[39m \u001b[38;5;28mself\u001b[39m.commit(t, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\_retry.py:42\u001b[39m, in \u001b[36mrun_with_retry\u001b[39m\u001b[34m(task, retry_policy, configurable)\u001b[39m\n\u001b[32m 40\u001b[39m task.writes.clear()\n\u001b[32m 41\u001b[39m \u001b[38;5;66;03m# run the task\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43mproc\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ParentCommand \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 44\u001b[39m ns: \u001b[38;5;28mstr\u001b[39m = config[CONF][CONFIG_KEY_CHECKPOINT_NS]\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\_internal\\_runnable.py:656\u001b[39m, in \u001b[36mRunnableSeq.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m 654\u001b[39m \u001b[38;5;66;03m# run in context\u001b[39;00m\n\u001b[32m 655\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m set_config_context(config, run) \u001b[38;5;28;01mas\u001b[39;00m context:\n\u001b[32m--> \u001b[39m\u001b[32m656\u001b[39m \u001b[38;5;28minput\u001b[39m = \u001b[43mcontext\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 657\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 658\u001b[39m \u001b[38;5;28minput\u001b[39m = step.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\_internal\\_runnable.py:400\u001b[39m, in \u001b[36mRunnableCallable.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m 398\u001b[39m run_manager.on_chain_end(ret)\n\u001b[32m 399\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m400\u001b[39m ret = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 401\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.recurse \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ret, Runnable):\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m ret.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\factory.py:1144\u001b[39m, in \u001b[36mcreate_agent.<locals>.model_node\u001b[39m\u001b[34m(state, runtime)\u001b[39m\n\u001b[32m 1141\u001b[39m response = _execute_model_sync(request)\n\u001b[32m 1142\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1143\u001b[39m \u001b[38;5;66;03m# Call composed handler with base handler\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1144\u001b[39m response = \u001b[43mwrap_model_call_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_execute_model_sync\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1146\u001b[39m \u001b[38;5;66;03m# Extract state updates from ModelResponse\u001b[39;00m\n\u001b[32m 1147\u001b[39m state_updates = {\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m: response.result}\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\factory.py:146\u001b[39m, in \u001b[36m_chain_model_call_handlers.<locals>.normalized_single\u001b[39m\u001b[34m(request, handler)\u001b[39m\n\u001b[32m 142\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mnormalized_single\u001b[39m(\n\u001b[32m 143\u001b[39m request: ModelRequest,\n\u001b[32m 144\u001b[39m handler: Callable[[ModelRequest], ModelResponse],\n\u001b[32m 145\u001b[39m ) -> ModelResponse:\n\u001b[32m--> \u001b[39m\u001b[32m146\u001b[39m result = \u001b[43msingle_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhandler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 147\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _normalize_to_model_response(result)\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\middleware\\types.py:1656\u001b[39m, in \u001b[36mdynamic_prompt.<locals>.decorator.<locals>.wrapped\u001b[39m\u001b[34m(_self, request, handler)\u001b[39m\n\u001b[32m 1651\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapped\u001b[39m(\n\u001b[32m 1652\u001b[39m _self: AgentMiddleware[StateT, ContextT],\n\u001b[32m 1653\u001b[39m request: ModelRequest,\n\u001b[32m 1654\u001b[39m handler: Callable[[ModelRequest], ModelResponse],\n\u001b[32m 1655\u001b[39m ) -> ModelCallResult:\n\u001b[32m-> \u001b[39m\u001b[32m1656\u001b[39m prompt = \u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mCallable[[ModelRequest], SystemMessage | str]\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1657\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(prompt, SystemMessage):\n\u001b[32m 1658\u001b[39m request = request.override(system_message=prompt)\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 5\u001b[39m, in \u001b[36mprompt_with_context\u001b[39m\u001b[34m(request)\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Inject context into state messages.\"\"\"\u001b[39;00m\n\u001b[32m 4\u001b[39m last_query = request.state[\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m][-\u001b[32m1\u001b[39m].text\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m retrieved_docs = \u001b[43mvector_store\u001b[49m\u001b[43m.\u001b[49m\u001b[43msimilarity_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlast_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 7\u001b[39m docs_content = \u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m.join(doc.page_content \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m retrieved_docs)\n\u001b[32m 9\u001b[39m system_message = (\n\u001b[32m 10\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mYou are a helpful assistant. Use the following context in your response:\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 11\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mdocs_content\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 12\u001b[39m )\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_chroma\\vectorstores.py:748\u001b[39m, in \u001b[36mChroma.similarity_search\u001b[39m\u001b[34m(self, query, k, filter, **kwargs)\u001b[39m\n\u001b[32m 730\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34msimilarity_search\u001b[39m(\n\u001b[32m 731\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 732\u001b[39m query: \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 735\u001b[39m **kwargs: Any,\n\u001b[32m 736\u001b[39m ) -> \u001b[38;5;28mlist\u001b[39m[Document]:\n\u001b[32m 737\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Run similarity search with Chroma.\u001b[39;00m\n\u001b[32m 738\u001b[39m \n\u001b[32m 739\u001b[39m \u001b[33;03m Args:\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 746\u001b[39m \u001b[33;03m List of documents most similar to the query text.\u001b[39;00m\n\u001b[32m 747\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m748\u001b[39m docs_and_scores = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msimilarity_search_with_score\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 749\u001b[39m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 750\u001b[39m \u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 751\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 752\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 753\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 754\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc, _ \u001b[38;5;129;01min\u001b[39;00m docs_and_scores]\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_chroma\\vectorstores.py:848\u001b[39m, in \u001b[36mChroma.similarity_search_with_score\u001b[39m\u001b[34m(self, query, k, filter, where_document, **kwargs)\u001b[39m\n\u001b[32m 840\u001b[39m results = \u001b[38;5;28mself\u001b[39m.__query_collection(\n\u001b[32m 841\u001b[39m query_texts=[query],\n\u001b[32m 842\u001b[39m n_results=k,\n\u001b[32m (...)\u001b[39m\u001b[32m 845\u001b[39m **kwargs,\n\u001b[32m 846\u001b[39m )\n\u001b[32m 847\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m848\u001b[39m query_embedding = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_embedding_function\u001b[49m\u001b[43m.\u001b[49m\u001b[43membed_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 849\u001b[39m results = \u001b[38;5;28mself\u001b[39m.__query_collection(\n\u001b[32m 850\u001b[39m query_embeddings=[query_embedding],\n\u001b[32m 851\u001b[39m n_results=k,\n\u001b[32m (...)\u001b[39m\u001b[32m 854\u001b[39m **kwargs,\n\u001b[32m 855\u001b[39m )\n\u001b[32m 857\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _results_to_docs_and_scores(results)\n",
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_google_genai\\embeddings.py:490\u001b[39m, in \u001b[36mGoogleGenerativeAIEmbeddings.embed_query\u001b[39m\u001b[34m(self, text, task_type, title, output_dimensionality)\u001b[39m\n\u001b[32m 488\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 489\u001b[39m msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mError embedding content: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m490\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GoogleGenerativeAIError(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 492\u001b[39m \u001b[38;5;66;03m# Single text returns single embedding\u001b[39;00m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(result.embeddings[\u001b[32m0\u001b[39m].values)\n",
"\u001b[31mGoogleGenerativeAIError\u001b[39m: Error embedding content: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
"During task with name 'model' and id '2df4c75f-65ba-cd3e-b448-0ed95a7614f8'"
]
}
],
"source": [
"query = \"What is the significance of the second loop?\\n\\n\"\n",
"for step in agent.stream(\n",
" {\"messages\": [{\"role\": \"user\", \"content\": query}]},\n",
" stream_mode=\"values\",\n",
"):\n",
" step[\"messages\"][-1].pretty_print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

5881
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -1,46 +0,0 @@
{
"name": "dynavera",
"version": "0.0.0",
"private": true,
"type": "module",
"engines": {
"node": "^20.19.0 || >=22.12.0"
},
"scripts": {
"dev": "vite -- --host",
"build": "run-p type-check \"build-only {@}\" --",
"preview": "vite preview",
"build-only": "vite build",
"watch": "vite build --watch",
"devwatch": "concurrently \"npm run dev\" \"npm run watch\"",
"type-check": "vue-tsc --build",
"lint": "eslint . --fix --cache",
"format": "prettier --write --experimental-cli src/"
},
"dependencies": {
"ant-design-vue": "^4.2.6",
"axios": "^1.13.2",
"pinia": "^3.0.4",
"vue": "^3.5.26",
"vue-router": "^4.6.4"
},
"devDependencies": {
"@tsconfig/node24": "^24.0.3",
"@types/node": "^24.10.4",
"@vitejs/plugin-vue": "^6.0.3",
"@vitejs/plugin-vue-jsx": "^5.1.3",
"@vue/eslint-config-prettier": "^10.2.0",
"@vue/eslint-config-typescript": "^14.6.0",
"@vue/tsconfig": "^0.8.1",
"eslint": "^9.39.2",
"eslint-plugin-vue": "~10.6.2",
"jiti": "^2.6.1",
"npm-run-all2": "^8.0.4",
"concurrently": "^8.2.0",
"prettier": "3.7.4",
"typescript": "~5.9.3",
"vite": "^7.3.0",
"vite-plugin-vue-devtools": "^8.0.5",
"vue-tsc": "^3.2.2"
}
}

View file

@ -1,64 +0,0 @@
amqp==5.3.1
asgiref==3.11.0
attrs==25.4.0
autobahn==25.12.2
Automat==25.4.16
billiard==4.2.4
cbor2==5.8.0
celery==5.6.2
cffi==2.0.0
channels==4.3.2
channels_redis==4.3.0
click==8.3.1
click-didyoumean==0.3.1
click-plugins==1.1.1.2
click-repl==0.3.0
colorama==0.4.6
constantly==23.10.4
cron_descriptor==2.0.6
cryptography==46.0.3
daphne==4.2.1
Django==5.2.10
django-celery-beat==2.8.1
django-cors-headers==4.9.0
django-jazzmin==3.0.1
django-timezone-field==7.2.1
django_celery_results==2.6.0
djangorestframework==3.16.1
httpx==0.28.1
hyperlink==21.0.0
idna==3.11
Incremental==24.11.0
kombu==5.6.2
msgpack==1.1.2
packaging==25.0
prompt_toolkit==3.0.52
psycopg2-binary==2.9.10
pgvector==0.4.2
PyPDF2==3.0.1
python-docx==1.1.2
py-ubjson==0.16.1
pyasn1==0.6.2
pyasn1_modules==0.4.2
pycparser==2.23
pyOpenSSL==25.3.0
python-crontab==3.3.0
python-dateutil==2.9.0.post0
python-dotenv==1.2.1
redis==7.1.0
service-identity==24.2.0
six==1.17.0
sqlparse==0.5.5
Twisted==25.5.0
txaio==25.12.2
typing_extensions==4.15.0
tzdata==2025.3
tzlocal==5.3.1
ujson==5.11.0
vine==5.1.0
wcwidth==0.2.14
whitenoise==6.11.0
zope.interface==8.2
openai==1.48.0
langchain-google-genai==2.0.4
sentence-transformers==3.0.1

Some files were not shown because too many files have changed in this diff Show more