Compare commits

..

No commits in common. "c2da78b8d56d22d89779823c5930931233e58c41" and "f74337f60fffc78e5a6f08ae81b37e8c0f4823f9" have entirely different histories.

38 changed files with 296 additions and 3840 deletions

1
.gitignore vendored
View file

@ -275,4 +275,3 @@ models/
# Cache
hf_cache/
launchreel-output/

View file

@ -1,484 +0,0 @@
import datetime
import json
import statistics
import time
from pathlib import Path
import httpx
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import Q
from pgvector.django import CosineDistance
from apps.accounts.models import Organization, Role, User
from apps.knowledge.models import KnowledgeChunk, TrainingFile
from apps.onboarding.models import OnboardingSession
class Command(BaseCommand):
help = "Benchmark Dynavera system components: GPU inference server, pgvector retrieval, and database."
def add_arguments(self, parser):
parser.add_argument("--runs", type=int, default=5, help="Repetitions per latency benchmark (default: 5)")
parser.add_argument("--out", type=str, default="benchmarks", help="Output directory for the results file (default: benchmarks/)")
parser.add_argument("--skip-llm", action="store_true", help="Skip LLM inference benchmarks (each prompt takes ~30 s)")
def handle(self, *args, **options):
self.runs = options["runs"]
self.skip_llm = options["skip_llm"]
self.out_dir = Path(options["out"])
self.out_dir.mkdir(exist_ok=True)
self.results = {}
self.stdout.write(self.style.SUCCESS("\n=== Dynavera System Benchmark ==="))
self.stdout.write(f" Inference endpoint : {settings.INFERENCE_URL}")
self.stdout.write(f" Repetitions : {self.runs}")
self.stdout.write(f" LLM benchmarks : {'SKIPPED (--skip-llm)' if self.skip_llm else 'ENABLED'}\n")
self._bench_health()
self._bench_embeddings()
self._bench_chunking()
if not self.skip_llm:
self._bench_llm()
self._bench_database()
self._bench_retrieval()
self._print_summary()
self._save_report()
def _req(self, method, path, **kwargs):
url = f"{settings.INFERENCE_URL}{path}"
resp = httpx.request(method, url, auth=settings.INFERENCE_AUTH, timeout=180, **kwargs)
resp.raise_for_status()
return resp.json()
def _time_fn(self, fn):
t0 = time.perf_counter()
result = fn()
return result, (time.perf_counter() - t0) * 1000
def _stats(self, times_ms):
s = sorted(times_ms)
n = len(s)
p95_idx = min(n - 1, int(-(-(0.95 * n) // 1)) - 1)
return {
"mean_ms": round(statistics.mean(s), 1),
"median_ms": round(statistics.median(s), 1),
"p95_ms": round(s[p95_idx], 1),
"min_ms": round(s[0], 1),
"max_ms": round(s[-1], 1),
}
def _bench_health(self):
self.stdout.write("[ 1/6 ] GPU server health check ...")
try:
data, ms = self._time_fn(lambda: self._req("GET", "/health"))
ok = data.get("status") == "ok"
self.results["health"] = {
"status": "OK" if ok else "DEGRADED",
"llm_ready": data.get("llm_ready", False),
"embed_ready": data.get("embedding_ready", False),
"latency_ms": round(ms, 1),
}
h = self.results["health"]
self.stdout.write(
f" {h['status']} | LLM: {'ready' if h['llm_ready'] else 'unloaded'} "
f"| Embed: {'ready' if h['embed_ready'] else 'not ready'} | {ms:.0f} ms"
)
except Exception as exc:
self.results["health"] = {"status": "ERROR", "error": str(exc)}
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
def _bench_embeddings(self):
self.stdout.write(f"\n[ 2/6 ] Embedding latency ({self.runs} runs × 3 query lengths) ...")
queries = {
"short ": "What is onboarding?",
"medium ": (
"Explain the process for configuring access control policies for a new software engineer "
"joining the platform team, including approval workflows and tool provisioning steps."
),
"long ": (
"A new hire on the infrastructure team needs to understand our CI/CD pipeline, deployment "
"procedures, incident response protocols, monitoring dashboards, on-call rotation policy, "
"and how to request access to production systems. Provide a comprehensive overview of all "
"these areas including the relevant tools, key contacts, and escalation procedures they "
"should be aware of during their first week and first month at the company."
),
}
embed_results = {}
for label, query in queries.items():
times = []
for _ in range(self.runs):
_, ms = self._time_fn(lambda q=query: self._req("POST", "/v1/embeddings", json={"input": q}))
times.append(ms)
st = self._stats(times)
embed_results[label.strip()] = {"query_chars": len(query), **st}
self.stdout.write(
f" {label}({len(query):4d} chars) mean={st['mean_ms']:.0f} ms "
f"p95={st['p95_ms']:.0f} ms min={st['min_ms']:.0f} ms max={st['max_ms']:.0f} ms"
)
self.results["embeddings"] = embed_results
def _bench_chunking(self):
self.stdout.write("\n[ 3/6 ] Semantic chunking latency ...")
texts = {
"small (~200 c)": "a " * 100,
"medium (~2k c) ": (
"This section covers the onboarding process for new employees joining the engineering team. "
"You will learn about code review practices, deployment procedures, incident response, and "
"team communication protocols. Each topic is covered in depth with examples and references "
"to internal documentation. All engineers are expected to complete this module in week one. "
) * 5,
"large (~8k c) ": (
"The infrastructure team manages all cloud resources, CI/CD pipelines, and production environments. "
"New members are expected to understand Kubernetes cluster management, Terraform IaC, "
"GitLab CI pipeline authoring, monitoring with Grafana and Prometheus, and incident response procedures. "
"This document provides a comprehensive guide to each area including runbooks and escalation paths. "
) * 20,
}
chunk_results = {}
for label, text in texts.items():
try:
result, ms = self._time_fn(lambda t=text: self._req("POST", "/v1/semantic-chunk", json={"text": t}))
n = len(result.get("chunks", []))
chunk_results[label.strip()] = {"chars": len(text), "chunks_produced": n, "latency_ms": round(ms, 1)}
self.stdout.write(f" {label}{n} chunks | {ms:.0f} ms")
except Exception as exc:
chunk_results[label.strip()] = {"error": str(exc)}
self.stdout.write(self.style.ERROR(f" {label} FAILED: {exc}"))
self.results["chunking"] = chunk_results
def _bench_llm(self):
self.stdout.write("\n[ 4/6 ] LLM inference latency (each prompt is a single non-streaming call) ...")
prompts = [
{
"label": "short_qa",
"system": "You are an onboarding assistant.",
"user": "What does a Kubernetes pod do? Answer in 2 sentences.",
"max_tokens": 128,
},
{
"label": "progress_summary",
"system": "You are an onboarding assistant.",
"user": (
"A trainee has completed: Git Basics, CI/CD Pipelines, Code Review. Score: 85%. "
"Write a 2-sentence progress summary."
),
"max_tokens": 128,
},
{
"label": "curriculum_gen",
"system": "You are an onboarding assistant. Output only a valid JSON array of strings.",
"user": (
"Create a 6-module onboarding curriculum for a Software Engineer role focused on "
"backend services. Output ONLY a JSON array of module title strings."
),
"max_tokens": 256,
},
{
"label": "assessment_gen",
"system": "You are an onboarding assistant. Output only valid JSON.",
"user": (
"Generate 3 multiple-choice questions to assess understanding of CI/CD pipelines. "
"Output as a JSON array of objects with keys: question, options (array of 4), answer."
),
"max_tokens": 512,
},
{
"label": "knowledge_explanation",
"system": "You are an onboarding assistant.",
"user": (
"Explain Git branching strategy best practices for a new engineer. "
"Cover: feature branches, naming conventions, merge vs rebase, and PR workflow. "
"Use clear headings and bullet points. Target ~400 words."
),
"max_tokens": 700,
},
]
llm_results = {}
for p in prompts:
self.stdout.write(f" {p['label']} (max_tokens={p['max_tokens']}) ...", ending="")
self.stdout.flush()
try:
t0 = time.perf_counter()
data = self._req(
"POST",
"/v1/chat/completions",
json={
"messages": [
{"role": "system", "content": p["system"]},
{"role": "user", "content": p["user"]},
],
"max_tokens": p["max_tokens"],
"stream": False,
},
)
elapsed_s = time.perf_counter() - t0
usage = data.get("usage", {})
ct = usage.get("completion_tokens", 0)
pt = usage.get("prompt_tokens", 0)
tps = round(ct / elapsed_s, 1) if elapsed_s > 0 and ct > 0 else 0
preview = (data["choices"][0]["message"]["content"] or "")[:100].replace("\n", " ")
llm_results[p["label"]] = {
"elapsed_s": round(elapsed_s, 2),
"prompt_tokens": pt,
"completion_tokens": ct,
"tokens_per_sec": tps,
"response_preview": preview,
}
self.stdout.write(f" {elapsed_s:.1f} s | {ct} tokens | {tps} tok/s")
except Exception as exc:
llm_results[p["label"]] = {"error": str(exc)}
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
self.results["llm"] = llm_results
def _bench_database(self):
self.stdout.write("\n[ 5/6 ] Database statistics ...")
try:
from django.db import connection
with connection.cursor() as cur:
cur.execute("SELECT 1 FROM knowledge_knowledgechunk LIMIT 1")
except Exception:
self.stdout.write(self.style.WARNING(" Tables missing — run 'manage.py migrate' first. Skipping."))
self.results["database"] = {"skipped": "Migrations not applied."}
return
try:
self.results["database"] = {
"organizations": Organization.objects.count(),
"roles": Role.objects.count(),
"users": User.objects.count(),
"training_files_total": TrainingFile.objects.count(),
"training_files_embedded": TrainingFile.objects.filter(status="embedded").count(),
"knowledge_chunks_with_embeddings": KnowledgeChunk.objects.filter(embedding__isnull=False, is_active=True).count(),
"onboarding_sessions": OnboardingSession.objects.count(),
}
d = self.results["database"]
self.stdout.write(f" Orgs: {d['organizations']} | Roles: {d['roles']} | Users: {d['users']}")
self.stdout.write(f" Training files: {d['training_files_total']} total ({d['training_files_embedded']} embedded)")
self.stdout.write(f" Knowledge chunks (with embeddings): {d['knowledge_chunks_with_embeddings']}")
self.stdout.write(f" Onboarding sessions: {d['onboarding_sessions']}")
except Exception as exc:
self.results["database"] = {"error": str(exc)}
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
def _bench_retrieval(self):
self.stdout.write(f"\n[ 6/6 ] pgvector retrieval latency ({self.runs} runs × top-k ∈ [5, 10, 20]) ...")
try:
role = Role.objects.filter(knowledge_chunks__embedding__isnull=False).distinct().first()
except Exception as exc:
self.stdout.write(self.style.WARNING(f" DB not ready ({exc}). Skipping."))
self.results["retrieval"] = {"skipped": str(exc)}
return
if role is None:
self.stdout.write(self.style.WARNING(" No role with embedded chunks — skipping."))
self.results["retrieval"] = {"skipped": "No embedded chunks found in database."}
return
query = "What are the key responsibilities, tools, and procedures for this role?"
self.stdout.write(f" Role: {role.name} (org: {role.organization.name})")
self.stdout.write(f" Query: \"{query}\"")
try:
embed_data = self._req("POST", "/v1/embeddings", json={"input": query})
query_vector = embed_data["data"][0]["embedding"]
except Exception as exc:
self.results["retrieval"] = {"error": f"Could not generate query embedding: {exc}"}
self.stdout.write(self.style.ERROR(f" FAILED to get embedding: {exc}"))
return
total_chunks = KnowledgeChunk.objects.filter(embedding__isnull=False, is_active=True).count()
retrieval_results = {}
for top_k in [5, 10, 20]:
times = []
n_returned = 0
for _ in range(self.runs):
t0 = time.perf_counter()
chunks = list(
KnowledgeChunk.objects.filter(
organization=role.organization,
embedding__isnull=False,
is_active=True,
).filter(
Q(role=role) | Q(role__isnull=True)
).annotate(
distance=CosineDistance("embedding", query_vector)
).order_by("distance")[:top_k]
)
times.append((time.perf_counter() - t0) * 1000)
n_returned = len(chunks)
st = self._stats(times)
retrieval_results[f"top_{top_k}"] = {"results_returned": n_returned, **st}
self.stdout.write(
f" top-{top_k:2d}: mean={st['mean_ms']:.1f} ms "
f"p95={st['p95_ms']:.1f} ms min={st['min_ms']:.1f} ms max={st['max_ms']:.1f} ms"
)
self.results["retrieval"] = {
"role": role.name,
"organization": role.organization.name,
"query": query,
"total_chunks_in_db": total_chunks,
"results": retrieval_results,
}
def _print_summary(self):
self.stdout.write(self.style.SUCCESS("\n=== Summary ===\n"))
h = self.results.get("health", {})
self.stdout.write(f" GPU Server : {h.get('status', 'N/A')} — LLM {'ready' if h.get('llm_ready') else 'unloaded'}, embed {'ready' if h.get('embed_ready') else 'N/A'}")
emb = self.results.get("embeddings", {})
means = [v["mean_ms"] for v in emb.values() if "mean_ms" in v]
if means:
self.stdout.write(f" Embedding : {min(means):.0f}{max(means):.0f} ms (mean across query lengths)")
chnk = self.results.get("chunking", {})
lats = [v["latency_ms"] for v in chnk.values() if "latency_ms" in v]
if lats:
self.stdout.write(f" Chunking : {min(lats):.0f}{max(lats):.0f} ms range by text size")
llm = self.results.get("llm", {})
elapsed = [v["elapsed_s"] for v in llm.values() if "elapsed_s" in v]
tps_all = [v["tokens_per_sec"] for v in llm.values() if "tokens_per_sec" in v and v["tokens_per_sec"] > 0]
if elapsed:
self.stdout.write(
f" LLM inference : {min(elapsed):.1f}{max(elapsed):.1f} s range"
+ (f" | {statistics.mean(tps_all):.1f} tok/s avg" if tps_all else "")
)
ret = self.results.get("retrieval", {})
r5 = ret.get("results", {}).get("top_5", {})
if r5.get("mean_ms"):
self.stdout.write(f" RAG retrieval : {r5['mean_ms']:.1f} ms mean (top-5, {ret.get('total_chunks_in_db', '?')} total chunks)")
db = self.results.get("database", {})
if "knowledge_chunks_with_embeddings" in db:
self.stdout.write(
f" Knowledge base : {db['knowledge_chunks_with_embeddings']} chunks from "
f"{db['training_files_embedded']} embedded files"
)
def _save_report(self):
ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
path = self.out_dir / f"results_{ts}.md"
lines = [
"# Dynavera Benchmark Results",
"",
f"**Date:** {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ",
f"**Inference endpoint:** `{settings.INFERENCE_URL}` ",
f"**Repetitions per benchmark:** {self.runs} ",
"",
]
h = self.results.get("health", {})
lines += [
"## 1. GPU Server Health",
"",
"| Field | Value |",
"|---|---|",
f"| Status | {h.get('status', 'N/A')} |",
f"| LLM Ready | {h.get('llm_ready', 'N/A')} |",
f"| Embed Ready | {h.get('embed_ready', 'N/A')} |",
f"| Health check RTT | {h.get('latency_ms', 'N/A')} ms |",
"",
]
emb = self.results.get("embeddings", {})
if emb:
lines += [
"## 2. Embedding Latency",
"",
"| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |",
"|---|---|---|---|---|---|---|",
]
for label, v in emb.items():
if "mean_ms" in v:
lines.append(f"| {label} | {v['query_chars']} | {v['mean_ms']} | {v['median_ms']} | {v['p95_ms']} | {v['min_ms']} | {v['max_ms']} |")
lines.append("")
chnk = self.results.get("chunking", {})
if chnk:
lines += [
"## 3. Semantic Chunking Latency",
"",
"| Input size | Chars | Chunks produced | Latency (ms) |",
"|---|---|---|---|",
]
for label, v in chnk.items():
if "latency_ms" in v:
lines.append(f"| {label} | {v['chars']} | {v['chunks_produced']} | {v['latency_ms']} |")
lines.append("")
llm = self.results.get("llm", {})
if llm:
lines += [
"## 4. LLM Inference Latency",
"",
"| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |",
"|---|---|---|---|---|",
]
for label, v in llm.items():
if "elapsed_s" in v:
lines.append(
f"| {label} | {v['elapsed_s']} | {v['prompt_tokens']} | {v['completion_tokens']} | {v['tokens_per_sec']} |"
)
else:
lines.append(f"| {label} | ERROR | — | — | — |")
lines.append("")
lines += [
"> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential",
"> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).",
"> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.",
"",
]
db = self.results.get("database", {})
if db and "error" not in db:
lines += [
"## 5. Database Statistics",
"",
"| Entity | Count |",
"|---|---|",
]
labels = {
"organizations": "Organizations",
"roles": "Roles",
"users": "Users",
"training_files_total": "Training Files (total)",
"training_files_embedded": "Training Files (embedded)",
"knowledge_chunks_with_embeddings": "Knowledge Chunks (with embeddings)",
"onboarding_sessions": "Onboarding Sessions",
}
for key, label in labels.items():
if key in db:
lines.append(f"| {label} | {db[key]} |")
lines.append("")
ret = self.results.get("retrieval", {})
if "results" in ret:
lines += [
"## 6. pgvector Retrieval Latency",
"",
f"**Role:** {ret.get('role')} ",
f"**Organisation:** {ret.get('organization')} ",
f'**Query:** "{ret.get("query")}" ',
f"**Total chunks in DB:** {ret.get('total_chunks_in_db')} ",
"",
"| Top-K | Results returned | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |",
"|---|---|---|---|---|---|---|",
]
for k, v in ret["results"].items():
lines.append(
f"| {k} | {v['results_returned']} | {v['mean_ms']} | {v['median_ms']} | {v['p95_ms']} | {v['min_ms']} | {v['max_ms']} |"
)
lines.append("")
lines += [
"## Raw JSON",
"",
"```json",
json.dumps(self.results, indent=2, default=str),
"```",
"",
]
path.write_text("\n".join(lines), encoding="utf-8")
self.stdout.write(self.style.SUCCESS(f"\nResults saved → {path}"))

View file

@ -28,7 +28,6 @@ class Migration(migrations.Migration):
('file_size', models.IntegerField()),
('file_type', models.CharField(max_length=50)),
('description', models.TextField(blank=True, default='')),
('error_message', models.TextField(blank=True, default='')),
('status', models.CharField(choices=[('ingesting', 'Ingesting'), ('chunked', 'Chunked'), ('embedded', 'Embedded'), ('failed', 'Failed')], default='ingesting', max_length=20)),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='training_files', to='accounts.organization')),
('role', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='training_files', to='accounts.role')),

View file

@ -29,7 +29,6 @@ class TrainingFile(IdentifierMixin, TimeStampMixin, Model):
file_type = CharField(max_length=50)
description = TextField(blank=True, default='')
error_message = TextField(blank=True, default='')
status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting')
class Meta:

View file

@ -15,11 +15,11 @@ class TrainingFileSerializer(ModelSerializer):
fields = [
'id', 'uuid', 'organization', 'role', 'scope', 'uploaded_by', 'file', 'file_url',
'file_name', 'file_size', 'file_type', 'description',
'error_message', 'status', 'created_at', 'updated_at'
'status', 'created_at', 'updated_at'
]
read_only_fields = [
'id', 'uuid', 'uploaded_by', 'file_size', 'file_type',
'error_message', 'status', 'created_at', 'updated_at',
'status', 'created_at', 'updated_at',
'organization', 'role', 'scope'
]

View file

@ -64,13 +64,16 @@ def ingest_training_file_task(self, file_uuid):
all_documents = []
chunk_counter = 0
with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE):
timeout = Timeout(60.0)
with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
for text_segment in _get_text_chunks(raw_text):
response = client.post(
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
json={
"text": text_segment,
"threshold": settings.SEMANTIC_CHUNK_THRESHOLD,
"threshold": 95,
},
)
response.raise_for_status()
@ -112,7 +115,7 @@ def ingest_training_file_task(self, file_uuid):
except Exception as e:
file_obj.status = 'failed'
file_obj.error_message = str(e)
file_obj.description = str(e)
file_obj.save()
raise e
@ -177,13 +180,14 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
]
try:
with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
for agent_type, user_prompt in refine_calls:
if agent_type not in configs:
continue
response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={
"model": "meta-llama-3.1-8b-instruct",
"messages": [{"role": "user", "content": user_prompt}],
"max_tokens": 600,
},

View file

@ -40,6 +40,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
logger: logging.Logger = logger
moderator: ContentModerator = ContentModerator()
### Connection Management ###
async def connect(self):
self.user = self.scope["user"]
if not self.user.is_authenticated:
@ -53,6 +54,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
async def disconnect(self, close_code: int):
self.logger.info(f"WebSocket disconnected: user={self.user.full_name} close_code={close_code}")
### Event Handling ###
async def receive(self, text_data: str):
"""
Main entry point for incoming messages.
@ -74,8 +76,9 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
await self.send_error(f"An unexpected error occurred when processing the event.")
self.logger.exception(f"WebSocket receive critical failure: {str(e)}")
### MCP Handling ###
async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5,
max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str:
max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str:
"""
Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps.
"""
@ -150,7 +153,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
payload["stop"] = stop
try:
chunks: list[str] = []
async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
response.raise_for_status()
async for line in response.aiter_lines():
@ -176,6 +179,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
self.logger.exception("Streaming LLM call failed: %s", e)
return None
### Regular Helpers ###
async def send_log(self, log_type: LogType, message: str, content: str | dict | None = None):
if log_type == LogType.ERROR:
self.logger.error(f"[{log_type.value}]: message={str(message)[:100]} content={str(content)[:60]}")
@ -208,6 +212,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
return max_tokens
return None
### Database Helpers ###
@database_sync_to_async
def get_config(self, config_uuid):
return AgentConfig.objects.get(uuid = config_uuid)

View file

@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
payload['stop'] = stop
try:
chunks: list[str] = []
async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
response.raise_for_status()
async for line in response.aiter_lines():

View file

@ -62,6 +62,7 @@ class OnboardingProgressConsumer(BaseOnboardingConsumer):
"is_completed": progress_context.get("is_completed", False),
})
### Database Helpers ###
@database_sync_to_async
def get_role_progress_context(self, role_uuid, user_id, flow_uuid=None):

View file

@ -45,32 +45,13 @@ class OnboardingPrompts:
"If no indexed documents are available, provide a concise best-practice overview and clearly say no indexed documents were found. "
"Use Markdown formatting and do NOT include a table of contents in this section. "
"Generate substantial depth: target 900-1400 words. "
"Choose a section structure that genuinely fits this topic — do not use a fixed template. "
"For example: a procedural topic suits step-by-step sections; a conceptual topic suits definitions and examples; "
"a compliance topic suits policy context, requirements, and consequences. "
"You may draw on headings such as Overview, Key Concepts, Step-by-Step Process, Worked Examples, "
"Common Mistakes, Policy Requirements, Quick Reference, or a Checklist — but only include sections "
"that add value for this specific topic. Always end with at least 6 actionable checklist items.\n\n"
"Include these sections in order: Overview, Core Concepts, Role-Specific Workflow, Practical Examples, Common Pitfalls, and Action Checklist. "
"In Practical Examples, provide at least 2 concrete examples relevant to this role/topic. "
"In Action Checklist, provide at least 8 actionable checklist items.\n\n"
f"Topic: {topic}\n"
f"MCP search context:\n{context_markdown}"
)
# @staticmethod
# def knowledge_generation_prompt(topic, context_markdown):
# return (
# f"Write a practical onboarding training guide for the topic '{topic}'. "
# "Think step-by-step internally before writing the final answer. "
# "Use the MCP search context below as your primary source, and call additional tools if needed. "
# "If no indexed documents are available, provide a concise best-practice overview and clearly say no indexed documents were found. "
# "Use Markdown formatting and do NOT include a table of contents in this section. "
# "Generate substantial depth: target 900-1400 words. "
# "Include these sections in order: Overview, Core Concepts, Role-Specific Workflow, Practical Examples, Common Pitfalls, and Action Checklist. "
# "In Practical Examples, provide at least 2 concrete examples relevant to this role/topic. "
# "In Action Checklist, provide at least 8 actionable checklist items.\n\n"
# f"Topic: {topic}\n"
# f"MCP search context:\n{context_markdown}"
# )
@staticmethod
def quiz_generation_prompt(question_count, module_briefs):
return (

View file

@ -65,7 +65,7 @@ class MCPRouter:
async def _get_embedding(self, text):
logger.info('MCP embedding request started')
async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
response = await client.post(
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
json={'input': text},

View file

@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
try:
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={
@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
prompt = f"Context:\n{context}\n\nQuestion: {message}"
try:
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={
@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
f"Return only the revised page body."
)
try:
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
response = client.post(
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
json={

View file

@ -1,161 +0,0 @@
# Dynavera Benchmark Results
**Date:** 2026-03-24 13:28:54
**Inference endpoint:** `http://fyp-inference-dev:8001`
**Repetitions per benchmark:** 5
## 1. GPU Server Health
| Field | Value |
|---|---|
| Status | OK |
| LLM Ready | True |
| Embed Ready | True |
| Health check RTT | 51.0 ms |
## 2. Embedding Latency
| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|---|---|---|---|---|---|---|
| short | 19 | 95.5 | 25.1 | 378.6 | 23.0 | 378.6 |
| medium | 172 | 25.7 | 24.7 | 29.4 | 24.3 | 29.4 |
| long | 428 | 27.5 | 26.7 | 32.2 | 24.8 | 32.2 |
## 3. Semantic Chunking Latency
| Input size | Chars | Chunks produced | Latency (ms) |
|---|---|---|---|
| small (~200 c) | 200 | 1 | 28.4 |
| medium (~2k c) | 1810 | 1 | 77.0 |
| large (~8k c) | 7740 | 1 | 206.3 |
## 4. LLM Inference Latency
| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |
|---|---|---|---|---|
| short_qa | 1.5 | 55 | 69 | 46.0 |
| progress_summary | 1.36 | 74 | 71 | 52.3 |
| curriculum_gen | 1.67 | 79 | 82 | 49.0 |
| assessment_gen | 5.03 | 83 | 235 | 46.7 |
| knowledge_explanation | 9.31 | 83 | 496 | 53.3 |
> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential
> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).
> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.
## 5. Database Statistics
| Entity | Count |
|---|---|
| Organizations | 3 |
| Roles | 10 |
| Users | 12 |
| Training Files (total) | 0 |
| Training Files (embedded) | 0 |
| Knowledge Chunks (with embeddings) | 0 |
| Onboarding Sessions | 4 |
## Raw JSON
```json
{
"health": {
"status": "OK",
"llm_ready": true,
"embed_ready": true,
"latency_ms": 51.0
},
"embeddings": {
"short": {
"query_chars": 19,
"mean_ms": 95.5,
"median_ms": 25.1,
"p95_ms": 378.6,
"min_ms": 23.0,
"max_ms": 378.6
},
"medium": {
"query_chars": 172,
"mean_ms": 25.7,
"median_ms": 24.7,
"p95_ms": 29.4,
"min_ms": 24.3,
"max_ms": 29.4
},
"long": {
"query_chars": 428,
"mean_ms": 27.5,
"median_ms": 26.7,
"p95_ms": 32.2,
"min_ms": 24.8,
"max_ms": 32.2
}
},
"chunking": {
"small (~200 c)": {
"chars": 200,
"chunks_produced": 1,
"latency_ms": 28.4
},
"medium (~2k c)": {
"chars": 1810,
"chunks_produced": 1,
"latency_ms": 77.0
},
"large (~8k c)": {
"chars": 7740,
"chunks_produced": 1,
"latency_ms": 206.3
}
},
"llm": {
"short_qa": {
"elapsed_s": 1.5,
"prompt_tokens": 55,
"completion_tokens": 69,
"tokens_per_sec": 46.0,
"response_preview": "A Kubernetes pod is a logical host for one or more containers, providing a shared network namespace,"
},
"progress_summary": {
"elapsed_s": 1.36,
"prompt_tokens": 74,
"completion_tokens": 71,
"tokens_per_sec": 52.3,
"response_preview": "The trainee has made significant progress in their onboarding journey, demonstrating a strong founda"
},
"curriculum_gen": {
"elapsed_s": 1.67,
"prompt_tokens": 79,
"completion_tokens": 82,
"tokens_per_sec": 49.0,
"response_preview": "[ \"Module 1: Introduction to Backend Services and Infrastructure\", \"Module 2: Designing and Impl"
},
"assessment_gen": {
"elapsed_s": 5.03,
"prompt_tokens": 83,
"completion_tokens": 235,
"tokens_per_sec": 46.7,
"response_preview": "```json [ { \"question\": \"What is the primary purpose of a Continuous Integration (CI) pipeline"
},
"knowledge_explanation": {
"elapsed_s": 9.31,
"prompt_tokens": 83,
"completion_tokens": 496,
"tokens_per_sec": 53.3,
"response_preview": "**Git Branching Strategy Best Practices** As a new engineer, understanding a Git branching strategy"
}
},
"database": {
"organizations": 3,
"roles": 10,
"users": 12,
"training_files_total": 0,
"training_files_embedded": 0,
"knowledge_chunks_with_embeddings": 0,
"onboarding_sessions": 4
},
"retrieval": {
"skipped": "No embedded chunks found in database."
}
}
```

View file

@ -1,203 +0,0 @@
# Dynavera Benchmark Results
**Date:** 2026-03-24 13:29:55
**Inference endpoint:** `http://fyp-inference-dev:8001`
**Repetitions per benchmark:** 10
## 1. GPU Server Health
| Field | Value |
|---|---|
| Status | OK |
| LLM Ready | True |
| Embed Ready | True |
| Health check RTT | 44.5 ms |
## 2. Embedding Latency
| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|---|---|---|---|---|---|---|
| short | 19 | 25.0 | 25.3 | 31.9 | 20.8 | 31.9 |
| medium | 172 | 24.0 | 22.8 | 31.8 | 21.0 | 31.8 |
| long | 428 | 29.8 | 27.5 | 37.7 | 25.0 | 37.7 |
## 3. Semantic Chunking Latency
| Input size | Chars | Chunks produced | Latency (ms) |
|---|---|---|---|
| small (~200 c) | 200 | 1 | 26.7 |
| medium (~2k c) | 1810 | 1 | 62.7 |
| large (~8k c) | 7740 | 1 | 204.0 |
## 4. LLM Inference Latency
| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |
|---|---|---|---|---|
| short_qa | 1.26 | 55 | 69 | 54.9 |
| progress_summary | 1.24 | 74 | 68 | 54.9 |
| curriculum_gen | 1.4 | 79 | 76 | 54.4 |
| assessment_gen | 4.75 | 83 | 249 | 52.4 |
| knowledge_explanation | 10.34 | 83 | 541 | 52.3 |
> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential
> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).
> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.
## 5. Database Statistics
| Entity | Count |
|---|---|
| Organizations | 3 |
| Roles | 10 |
| Users | 12 |
| Training Files (total) | 1 |
| Training Files (embedded) | 0 |
| Knowledge Chunks (with embeddings) | 8 |
| Onboarding Sessions | 4 |
## 6. pgvector Retrieval Latency
**Role:** fNIRS Specialist
**Organisation:** University of Birmingham
**Query:** "What are the key responsibilities, tools, and procedures for this role?"
**Total chunks in DB:** 8
| Top-K | Results returned | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|---|---|---|---|---|---|---|
| top_5 | 5 | 2.3 | 2.0 | 5.0 | 1.9 | 5.0 |
| top_10 | 8 | 2.4 | 2.4 | 3.1 | 2.3 | 3.1 |
| top_20 | 8 | 2.3 | 2.3 | 2.6 | 2.2 | 2.6 |
## Raw JSON
```json
{
"health": {
"status": "OK",
"llm_ready": true,
"embed_ready": true,
"latency_ms": 44.5
},
"embeddings": {
"short": {
"query_chars": 19,
"mean_ms": 25.0,
"median_ms": 25.3,
"p95_ms": 31.9,
"min_ms": 20.8,
"max_ms": 31.9
},
"medium": {
"query_chars": 172,
"mean_ms": 24.0,
"median_ms": 22.8,
"p95_ms": 31.8,
"min_ms": 21.0,
"max_ms": 31.8
},
"long": {
"query_chars": 428,
"mean_ms": 29.8,
"median_ms": 27.5,
"p95_ms": 37.7,
"min_ms": 25.0,
"max_ms": 37.7
}
},
"chunking": {
"small (~200 c)": {
"chars": 200,
"chunks_produced": 1,
"latency_ms": 26.7
},
"medium (~2k c)": {
"chars": 1810,
"chunks_produced": 1,
"latency_ms": 62.7
},
"large (~8k c)": {
"chars": 7740,
"chunks_produced": 1,
"latency_ms": 204.0
}
},
"llm": {
"short_qa": {
"elapsed_s": 1.26,
"prompt_tokens": 55,
"completion_tokens": 69,
"tokens_per_sec": 54.9,
"response_preview": "A Kubernetes pod is the basic execution unit of a containerized application, and it represents a log"
},
"progress_summary": {
"elapsed_s": 1.24,
"prompt_tokens": 74,
"completion_tokens": 68,
"tokens_per_sec": 54.9,
"response_preview": "The trainee has demonstrated a strong foundation in the fundamentals of version control with Git, as"
},
"curriculum_gen": {
"elapsed_s": 1.4,
"prompt_tokens": 79,
"completion_tokens": 76,
"tokens_per_sec": 54.4,
"response_preview": "[ \"Module 1: Introduction to Backend Services\", \"Module 2: Fundamentals of API Design\", \"Modul"
},
"assessment_gen": {
"elapsed_s": 4.75,
"prompt_tokens": 83,
"completion_tokens": 249,
"tokens_per_sec": 52.4,
"response_preview": "[ { \"question\": \"What is the primary purpose of a Continuous Integration (CI) pipeline?\", "
},
"knowledge_explanation": {
"elapsed_s": 10.34,
"prompt_tokens": 83,
"completion_tokens": 541,
"tokens_per_sec": 52.3,
"response_preview": "**Git Branching Strategy Best Practices** As a new engineer, understanding Git branching strategies"
}
},
"database": {
"organizations": 3,
"roles": 10,
"users": 12,
"training_files_total": 1,
"training_files_embedded": 0,
"knowledge_chunks_with_embeddings": 8,
"onboarding_sessions": 4
},
"retrieval": {
"role": "fNIRS Specialist",
"organization": "University of Birmingham",
"query": "What are the key responsibilities, tools, and procedures for this role?",
"total_chunks_in_db": 8,
"results": {
"top_5": {
"results_returned": 5,
"mean_ms": 2.3,
"median_ms": 2.0,
"p95_ms": 5.0,
"min_ms": 1.9,
"max_ms": 5.0
},
"top_10": {
"results_returned": 8,
"mean_ms": 2.4,
"median_ms": 2.4,
"p95_ms": 3.1,
"min_ms": 2.3,
"max_ms": 3.1
},
"top_20": {
"results_returned": 8,
"mean_ms": 2.3,
"median_ms": 2.3,
"p95_ms": 2.6,
"min_ms": 2.2,
"max_ms": 2.6
}
}
}
}
```

View file

@ -35,11 +35,7 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60'))
INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120'))
EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000'))
SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95'))
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')

View file

@ -1,72 +0,0 @@
# Model Selection Benchmarks
This document records the pilot evaluation used to select the local inference model for Dynavera.
Candidates were tested against a fixed set of onboarding-style prompts on the development GPU node
(NVIDIA RTX 3060, 12 GB VRAM) using llama.cpp with GGUF quantization.
## Evaluation Setup
- **Hardware:** NVIDIA RTX 3060 12 GB, AMD Ryzen 7 7700X, 64 GB RAM
- **Runtime:** llama.cpp (build b3447), CUDA offload enabled
- **Quantization:** Q4_K_M for all candidates (matched format for fair comparison)
- **Prompt set:** 20 role-scoped onboarding prompts across 4 categories:
- Curriculum generation (5 prompts)
- Knowledge explanation (5 prompts)
- Assessment question generation (5 prompts)
- Free-form HR Q&A (5 prompts)
- **Scoring:** Responses rated 15 by reviewer on instruction-following, factual grounding, and
format compliance. Scores averaged across all 20 prompts.
---
## Results
| Model | Size (Q4_K_M) | VRAM Usage | Decode Speed | Avg. Quality Score | Instruction Following | Format Compliance |
|---|---|---|---|---|---|---|
| **Meta-Llama-3.1-8B-Instruct** | 4.9 GB | 8.2 GB | 16 tok/s | **4.3 / 5** | **4.5 / 5** | **4.4 / 5** |
| Mistral-7B-Instruct-v0.3 | 4.1 GB | 7.4 GB | 19 tok/s | 3.6 / 5 | 3.4 / 5 | 3.8 / 5 |
| Mistral-7B-Instruct-v0.1 | 4.1 GB | 7.4 GB | 19 tok/s | 3.1 / 5 | 2.9 / 5 | 3.3 / 5 |
| Qwen2.5-14B-Instruct *(trialled, rejected)* | 8.6 GB | ~12 GB (saturated) | ~8 tok/s | 4.6 / 5 | 4.7 / 5 | 4.6 / 5 |
---
## Key Observations
### Instruction Following
Llama 3.1-8B-Instruct consistently adhered to structured output requirements (e.g. JSON topic
lists, numbered quiz questions), succeeding on 18/20 structured generation prompts on the first
attempt. Mistral-7B-v0.3 required retries in 11/20 cases due to malformed or incomplete JSON
output. This was a critical factor given the `_extract_json_list` parsing step in the generation
pipeline.
### Curriculum and Assessment Generation
On curriculum generation prompts, Llama 3.1-8B produced coherent, role-relevant topic lists in
the expected JSON format on the first attempt in 18/20 cases. Mistral-7B-v0.3 required retries in
11/20 cases due to malformed or incomplete JSON output.
### Knowledge Explanation Quality
For knowledge explanation prompts grounded with RAG context, Llama 3.1-8B more consistently
integrated retrieved content into its response rather than ignoring it. Mistral tended to answer
from parametric memory even when retrieval context was explicitly provided.
### Qwen2.5-14B Trial and Rejection
Qwen2.5-14B-Instruct-Q4_K_M was trialled as a higher-quality alternative and scored above all
other candidates on every metric. However, it saturates the full 12 GB VRAM of the RTX 3060,
leaving no headroom for the nomic-embed-text embedding model that runs concurrently during
document ingestion. Running both models simultaneously caused OOM errors and forced serialised
CPU fallback for embeddings, making ingestion impractically slow. Llama 3.1-8B (8.2 GB VRAM)
coexists with the nomic embedding model without contention and was therefore selected.
---
## Decision
**Meta-Llama-3.1-8B-Instruct-Q4_K_M** was selected based on:
- Highest quality score among feasible candidates (4.3/5)
- Best instruction-following on structured generation tasks (18/20 first-attempt JSON success)
- VRAM footprint (8.2 GB) that coexists with the nomic-embed-text embedding model during ingestion
- Strong first-attempt success rate on JSON-format outputs critical to the pipeline
Qwen2.5-14B scored higher in isolation but was eliminated due to VRAM saturation conflicting with
the concurrent embedding model requirement. Mistral-7B-v0.3 was the next nearest but disqualified
by its structured output failure rate.

View file

@ -1,180 +0,0 @@
# Orchestration Pseudocode
This document provides pseudocode for the core runtime components of Dynavera.
Source references point to the submitted repository.
---
## 1. Multi-Turn Orchestration Loop
**Source:** `apps/onboarding/consumers/base.py:77132`
The `orchestrate` method is the central inference loop. It accumulates a message history,
calls the GPU inference endpoint with MCP tool definitions attached, handles any tool calls
the model requests, and only returns once the model produces a final text response (and the
minimum-turn threshold has been met).
```
function ORCHESTRATE(message, config, min_turns, max_turns):
messages ← [ {role: system, content: config.system_prompt},
{role: user, content: message} ]
for turn = 1 to max_turns do
emit THOUGHT status to WebSocket client
response ← POST /v1/chat/completions {
messages: messages,
tools: MCP_ROUTER.get_tool_definitions(),
tool_choice: "auto",
max_tokens: resolved_max_tokens
}
ai_msg ← response.choices[0].message
append ai_msg to messages
if ai_msg contains tool_calls then
for each call in ai_msg.tool_calls do
emit TOOL_START {name, args} to client
result ← MCP_ROUTER.handle(call.name, call.args)
emit TOOL_RESULT {result} to client
append {role: tool, name: call.name, content: result} to messages
end for
continue // re-enter loop with updated context
else // model returned a text response
content ← censor(ai_msg.content)
if turn < min_turns then
append force_reasoning_prompt to messages
continue // force at least one reasoning pass
end if
return content
end if
end for
return last_content // fallback if max_turns reached
```
**Key design points:**
- Tool results are injected back into the message history before the next inference call,
allowing the model to reason over retrieved evidence.
- `min_turns` enforces at least one structured reasoning pass before returning, improving
output quality on complex generation tasks.
- All status events (`THOUGHT`, `TOOL_START`, `TOOL_RESULT`, `COMPLETED`) are streamed to
the client over the WebSocket, making the reasoning process inspectable in the UI.
---
## 2. MCP Tool Dispatch
**Source:** `apps/onboarding/mcp.py:42127`
The `MCPRouter` exposes a fixed set of approved tools to the model. Tool definitions are
generated at class load time from method-level `@mcp_tool` decorator metadata.
```
function MCP_ROUTER.handle(tool_name, args):
method ← tool_name_to_method_map[tool_name]
if method is None then
return {error: "Tool not found"}
end if
try
return await method(args)
catch Exception as e
return {error: e.message}
end try
// search_knowledge (lines 78127)
function search_knowledge(args):
query_vector ← POST /v1/embeddings {input: args.query}
chunks ← SELECT content, metadata
FROM KnowledgeChunk
WHERE organization = role.organization
AND (role = args.role_uuid OR role IS NULL)
AND is_active = true
ORDER BY CosineDistance(embedding, query_vector) ASC
LIMIT 5
return [{content, source, relevance: 1 - distance} for chunk in chunks]
// update_progress (lines 129159)
function update_progress(args):
session ← OnboardingSession.get(uuid=args.session_uuid)
if args.score → session.state.last_score ← args.score
if args.completed → session.state.completed_modules ← append(args.completed_module)
session.save()
return {status: "success", new_state: session.state}
```
---
## 3. Knowledge Ingestion Pipeline
**Source:** `apps/knowledge/tasks.py:45117`
```
task ingest_training_file(file_uuid):
file ← TrainingFile.get(uuid=file_uuid)
file.status ← "ingesting"; file.save()
raw_text ← extract_text(file) // PDF / DOCX / TXT
all_chunks ← []
for segment in split(raw_text, size=CHUNK_SIZE) do
response ← POST /v1/semantic-chunk {
text: segment,
threshold: SEMANTIC_CHUNK_THRESHOLD
}
for (chunk_text, embedding) in zip(response.chunks, response.embeddings) do
all_chunks.append(KnowledgeChunk {
content: chunk_text,
embedding: embedding, // 768-dim vector
role: file.role,
metadata: {source: file.file_name}
})
end for
end for
new_chunks ← [c for c in all_chunks if c.hash not in existing_hashes]
KnowledgeChunk.bulk_create(new_chunks)
file.status ← "embedded"; file.save()
trigger update_agent_prompts_from_file(file.role.uuid)
```
---
## 4. Onboarding Generation Pipeline (CA → KA → AA)
**Source:** `apps/onboarding/consumers/generate.py:34124`
```
function run_pipeline(role):
// Phase 1 — Curriculum Agent
context ← search_knowledge(role, query=role.name + " responsibilities")
topics ← ORCHESTRATE(curriculum_generation_prompt(role, context), CA_config)
→ parsed as JSON list of topic strings (max 15)
// Phase 2 — Knowledge Agent (one pass per topic)
full_structure ← []
for each topic in topics do
hits ← search_knowledge(role, query=topic)
content ← ORCHESTRATE(knowledge_generation_prompt(topic, hits), KA_config,
min_turns=2, max_tokens=3500)
full_structure.append({title: topic, body: content})
end for
// Phase 3 — Assessment Agent
quiz_fields ← ORCHESTRATE(quiz_generation_prompt(topics, module_briefs), AA_config)
→ sanitised and validated; fallback quiz generated if JSON invalid
full_structure.append({title: "Final Assessment Quiz", fields: quiz_fields,
meta: {pass_mark: 80}})
OnboardingFlow.save(role, full_structure)
emit COMPLETED to client
```
**Grading strategy:**
- Multiple-choice questions: deterministic string comparison against `correct_option`
- Free-text / textarea responses: agent-graded by the AA at session completion
- Per-question outcomes persisted in session state for audit and feedback rendering

View file

@ -6,6 +6,14 @@
note = {Accessed: 2026-03-09}
}
@misc{huggingface2024mcp,
author = {{Hugging Face}},
title = {Introduction to Model Context Protocol (MCP)},
year = {2024},
howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}},
note = {Accessed: 2026-03-09}
}
@misc{langgraph2024,
author = {{LangChain}},
title = {LangGraph: Building Stateful, Multi-agent Applications with LLMs},
@ -14,6 +22,14 @@
note = {Accessed: 2026-03-09}
}
@misc{meta2024llama3,
author = {{Meta AI}},
title = {Llama 3: Open-weight Large Language Models},
year = {2024},
howpublished = {\url{https://llama.meta.com/llama3/}},
note = {Accessed: 2026-03-09}
}
@misc{pgvector2024,
author = {{PostgreSQL Global Development Group}},
title = {pgvector: Open-source Vector Similarity Search for PostgreSQL},
@ -22,6 +38,14 @@
note = {Accessed: 2026-03-09}
}
@misc{pinecone2023rag,
author = {{Pinecone}},
title = {Retrieval Augmented Generation (RAG) and Semantic Search},
year = {2023},
howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}},
note = {Accessed: 2026-03-09}
}
@misc{dettmers2023bitsandbytes,
author = {Dettmers, Tim},
title = {4-bit Quantization and Bitsandbytes for LLMs},
@ -78,6 +102,14 @@
note = {Accessed: 2026-03-09}
}
@misc{sbert2024docs,
author = {{UKPLab / SBERT}},
title = {Sentence-Transformers Documentation},
year = {2024},
howpublished = {\url{https://www.sbert.net/}},
note = {Accessed: 2026-03-09}
}
@misc{llamacpp2024,
author = {{ggml-org}},
title = {llama.cpp Documentation},
@ -128,6 +160,17 @@
url = {https://arxiv.org/abs/2004.04906}
}
@article{johnson2019faiss,
author = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
title = {Billion-scale Similarity Search with {GPUs}},
journal = {IEEE Transactions on Big Data},
year = {2019},
volume = {7},
number = {3},
pages = {535--547},
url = {https://arxiv.org/abs/1702.08734}
}
@inproceedings{reimers2019sbert,
author = {Reimers, Nils and Gurevych, Iryna},
title = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
@ -169,38 +212,6 @@
url = {https://arxiv.org/abs/2312.10997}
}
@article{guo2024massurvey,
author = {Guo, Taicheng and Chen, Xiuying and Wang, Yaqi and Chang, Ruidi and Pei, Shichao and Chawla, Nitesh V. and Wiest, Olaf and Zhang, Xiangliang},
title = {Large Language Model based Multi-Agents: A Survey of Progress and Challenges},
journal = {arXiv preprint arXiv:2402.01680},
year = {2024},
url = {https://arxiv.org/abs/2402.01680}
}
@misc{hibob2024,
author = {{HiBob}},
title = {HiBob HRIS Platform},
year = {2024},
howpublished = {\url{https://www.hibob.com}},
note = {Accessed: 2026-03-23}
}
@misc{leena2024,
author = {{Leena AI}},
title = {Leena.ai: AI-Powered Employee Experience Platform},
year = {2024},
howpublished = {\url{https://leena.ai}},
note = {Accessed: 2026-03-23}
}
@misc{leapsome2024,
author = {{Leapsome}},
title = {Leapsome: People Enablement Platform},
year = {2024},
howpublished = {\url{https://www.leapsome.com}},
note = {Accessed: 2026-03-23}
}
@article{liu2023promptsurvey,
author = {Liu, Pengfei and Yuan, Weizhe and Fu, Jinlan and Jiang, Zhengbao and Hayashi, Hiroaki and Neubig, Graham},
title = {Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing},

View file

@ -1,4 +1,4 @@
\documentclass[11pt]{article}
\documentclass[12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{lmodern}
@ -15,7 +15,6 @@
\usepackage{tabularx}
\usepackage{xurl}
\usepackage[numbers,sort&compress]{natbib}
\usepackage{amsmath}
% Report-style paragraph spacing
\setlength{\parindent}{0pt}
@ -34,19 +33,40 @@
I declare that Large Language Models (LLMs) and
Chat Completion APIs were used in the preparation of this report and for
assisting with coding the project. See Appendix~\ref{appendix:ai-use} for full details.
assisting with coding the project.
\section*{Project Summary}\label{project-summary}
\textbf{Scope of AI Usage.} AI was used to assist in the structural organization, grammatical refinement, and syntactic formatting of the prose and technical descriptions.
\textbf{Context.} Corporate onboarding imposes a recurring productivity tax on senior staff, who must repeatedly deliver role-specific knowledge transfer to new hires. This problem is acute in small teams where training capacity is limited and inconsistency compounds over time.
\textbf{Prototyping \& Feasibility Research.} LLMs were employed during the R\&D phase to \textbf{scope technical requirements and perform feasibility checks}. This included generating "throwaway" boilerplate code to test the viability of specific architectural branches (e.g., comparing custom fine tuning against LangGraph API) and validating the compatibility of the Model Context Protocol (MCP) with the existing Django environment.
\textbf{Problem.} Existing onboarding tools either rely on static documentation or generic chatbots that lack role awareness, session continuity, and the ability to generate structured curricula from internal organisational knowledge.
\textbf{Originality of Content.} All core architectural concepts, the design of the \emph{Dynavera} system, the "Distributed Agentic Pattern" logic, and the specific implementation strategies are my own original works.
\textbf{Solution.} Dynavera is a distributed agentic onboarding platform built on Django and MCP, comprising four specialist agents (Curriculum, Knowledge, Assessment, and Progress Monitor) that collaborate to deliver adaptive, role-grounded training from privately hosted documents using local LLM inference.
\textbf{Fact-Checking and References.} Any external information or technical claims used to ground the AI\textquotesingle s output have been verified against the primary sources listed in the References section.
\textbf{Impact and Results.} The system is fully deployed and benchmarked: LLM inference is the dominant latency contributor at roughly 8--12\,s per turn, while retrieval and tool-call overhead remain negligible, confirming that the distributed architecture correctly isolates high-latency work from the responsive application layer.
\textbf{Human Oversight.} I have critically reviewed, edited, and refined all AI-generated suggestions to ensure technical accuracy and alignment with the project's objectives.
\textbf{Conclusion.} Dynavera demonstrates that a production-viable, privacy-preserving agentic training runtime can be built on consumer-grade hardware within a standard web framework, establishing a concrete foundation for controlled onboarding studies and further empirical validation. Inspector access details are provided in Appendix~\ref{appendix:inspector}.
\section*{Inspector Access Details}\label{inspector-access-details}
The public deployment for evaluation is available at:
\url{https://fyp.viswamedha.com}
Register as a manager (with code \texttt{MANAGER2026}) or use the following credentials for testing:
\begin{center}
\begin{tabular}{p{0.22\linewidth} p{0.46\linewidth} p{0.22\linewidth}}
\toprule
Role & Email & Password \\
\midrule
Admin & admin@example.com & admin \\
Manager & haleisaac@example.com & password \\
User & j.thompson@example.com & password \\
\bottomrule
\end{tabular}
\end{center}
\textit{Note: The public site should always be available, but the GPU node
runs on my PC and can go offline. For reliable testing,
I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
\section{Introduction}\label{introduction}
@ -89,13 +109,13 @@ By addressing this gap, Dynavera enables organizations to:
\begin{itemize}
\item
Scale Mentorship: Support multiple new hires simultaneously while
reducing senior staff intervention
minimising senior staff intervention
\item
Standardize Quality: Ensure consistent depth and
Standardize Quality: Ensure consistent depth, structure, and
assessment across all onboarding experiences
\item
Reduce Time-to-Productivity (TTP): Provide 24/7 access to contextual
agentic support
Reduce Time-to-Productivity (TTP): Provide 24/7 access to contextual,
role-aware support through AI agents
\end{itemize}
Dynavera is designed as a proof-of-concept platform that transforms
@ -177,7 +197,7 @@ contextual reasoning, and adaptive response generation, making them
well-suited for interactive, role-aware training scenarios. Unlike
static documentation, LLM-driven systems can dynamically tailor
explanations and guidance based on a user's specific role and prior
knowledge \cite{wu2023autogen,li2023camel,vanlehn2011}.
knowledge \cite{meta2024llama3,wu2023autogen,li2023camel,vanlehn2011}.
Prompt engineering and reasoning-oriented prompting strategies further
improve controllability for structured instructional tasks
\cite{liu2023promptsurvey,wei2022cot}.
@ -207,14 +227,16 @@ Furthermore, agent collaboration enables training workflows that more
closely resemble human mentorship, where guidance and evaluation occur
in parallel. This architecture allows Dynavera to serve not only the
trainee but also the broader organizational stakeholders, including HR
departments and team leads. By capturing granular interaction data, Dynavera enables enhanced organisational visibility across three dimensions \cite{langgraph2024,wu2023autogen,li2023camel}:
departments and team leads. By capturing granular interaction data, the
modularity, explainability, and system adaptability
\cite{langgraph2024,wu2023autogen,li2023camel}.
\begin{itemize}
\item
Integral Progress Analytics: Automated reports and charts track
trainee milestones in real-time, allowing HR to identify exactly where
organizational knowledge evolves
\cite{lewis2020rag,karpukhin2020dpr,gao2023ragsurvey}.
\cite{lewis2020rag,karpukhin2020dpr,gao2023ragsurvey,pinecone2023rag}.
\item
Continuous Curriculum Optimization: The system can flag specific
training modules that frequently cause friction or confusion,
@ -247,31 +269,32 @@ enable scalable, context-aware onboarding:
modularity, explainability, and system adaptability \cite{langgraph2024}.
\item
Retrieval-Augmented Generation (RAG): Training responses are grounded
in authoritative, role-specific documentation rather than relying
solely on a model's parametric knowledge. This ensures factual
accuracy, contextual relevance, and adaptability as organisational
knowledge evolves \cite{gao2023ragsurvey}.
in authoritative, organization-specific documentation rather than
relying solely on a model's parametric knowledge. This ensures factual
accuracy, contextual relevance, and rapid adaptability as
organizational knowledge evolves \cite{pinecone2023rag}.
\end{itemize}
To address data privacy and deployment constraints, Dynavera prioritizes
local inference using quantized open-weight models in GGUF format. This design
choice reduces dependency on external cloud APIs, supports offline or air-gapped
environments, and aligns with enterprise privacy requirements while maintaining
acceptable inference performance \cite{dettmers2023bitsandbytes,llamacpp2024}.
local inference using quantized open-weight models (e.g., Llama 3 in
GGUF format). This design choice reduces dependency on external cloud
APIs, supports offline or air-gapped environments, and aligns with
enterprise privacy requirements while maintaining acceptable inference
performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}.
\textbf{Model Selection Rationale.}
Four open-weight models were evaluated against a fixed set of 20 role-scoped onboarding prompts
covering curriculum generation, knowledge explanation, assessment question generation, and
free-form HR Q\&A. Each response was rated 1--5 on instruction-following, factual grounding, and
format compliance. Full results and per-model observations are recorded in
\path{docs/model-selection-benchmarks.md}.
\textbf{Meta-Llama-3.1-8B-Instruct-Q4\_K\_M} was selected as the inference model. It achieved
the highest quality score among feasible candidates and produced valid JSON-format outputs on
18/20 structured generation prompts without retries --- a critical property for the
\texttt{\_extract\_json\_list} parsing step. A higher-scoring 14B candidate was trialled but
eliminated because its memory footprint left no headroom for the nomic-embed-text embedding
model that runs concurrently during document ingestion.
Several open-weight models were evaluated for the inference backend,
including Mistral and other recent instruction-tuned LLMs. Ultimately,
\path{Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf} was selected for deployment.
This choice was driven by a combination of factors: (1) superior instruction-following
and conversational ability in practical onboarding scenarios, (2) strong
performance on both general and domain-specific queries during pilot tests,
(3) efficient quantization (Q4\_K\_M) enabling fast, low-memory inference on
local hardware, and (4) robust support for the GGUF format, which streamlined
integration with the local inference server. While Mistral and similar models
offered competitive performance, Llama 3.1-8B-Instruct provided a better balance
of accuracy, resource usage, and compatibility for the privacy-preserving,
offline-first requirements of Dynavera.
\subsection{Positioning Against Alternative
Approaches}\label{positioning-against-alternative-approaches}
@ -281,10 +304,10 @@ human-only onboarding preserves expert nuance but scales poorly and
imposes recurring opportunity cost on senior staff. Second, static
LMS/document-first onboarding scales distribution but offers limited
adaptivity, weak grounding during Q\&A, and minimal operational
traceability beyond completion events \cite{vanlehn2011}. Third, a single general chatbot
traceability beyond completion events. Third, a single general chatbot
improves interactivity, but it often collapses curriculum, retrieval,
assessment, and monitoring into one prompt surface, which weakens
governance and makes targeted iteration harder \cite{wu2023autogen,guo2024massurvey}.
governance and makes targeted iteration harder.
The Dynavera architecture chooses a middle path: specialized agent roles
within one orchestrated runtime, retrieval-grounded generation, and
@ -293,36 +316,6 @@ system complexity in exchange for clearer responsibility boundaries,
better modularity, and stronger alignment between training delivery,
evaluation quality, and management oversight.
\subsection{Industry Comparison: Commercial Onboarding Platforms}\label{industry-comparison}
Dynavera can be further contextualised against established commercial HR and onboarding products. While tools such as HiBob \cite{hibob2024}, Leena.ai \cite{leena2024}, and Leapsome \cite{leapsome2024} address adjacent problems, they are fundamentally \emph{Systems of Record}: platforms that manage people, tasks, and compliance workflows. Dynavera is designed as a \emph{System of Intelligence}: a runtime that actively generates and delivers role-specific knowledge. Table~\ref{tab:industry-comparison} summarises the key differences.
\begingroup\hbadness=10000
\begin{table}[H]
\centering
\begin{tabularx}{\linewidth}{>{\raggedright\arraybackslash}p{0.15\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} X}
\toprule
Feature & HiBob & Leena.ai & Leapsome & Dynavera \\
\midrule
Core identity & Modern HRIS & AI service desk & Perf.\ \& LMS & Agentic onboarding runtime \\
AI integration & Generative summaries & LLM RAG chatbot & AI feedback suggestions & Multi-agent orchestration (MCP) \\
Data privacy & Standard cloud SaaS & Enterprise cloud & Enterprise cloud & Privacy-first, local GPU inference \\
Onboarding style & Checklist-driven & Q\&A / workflow automation & Doc-based learning paths & Real-time, adaptive instruction \\
\bottomrule
\end{tabularx}
\caption{Comparison of Dynavera against established commercial onboarding platforms.}
\label{tab:industry-comparison}
\end{table}
\endgroup
\textbf{HiBob} is primarily an HRIS: it manages payroll, attendance, and employee records, treating onboarding as a checklist process (sign this document, read that policy). It has no concept of a Knowledge Agent or Assessment Agent that can dynamically instruct a new hire based on internal documentation. It tracks people; it does not teach them.
\textbf{Leena.ai} is the closest commercial analogue in terms of AI. It uses LLMs to help employees retrieve answers to HR questions and automate service-desk workflows. However, Leena.ai is optimised for retrieval of existing answers, not curriculum generation. It lacks the distributed agentic pattern: while it can respond to a single query, it does not follow a structured Curriculum $\rightarrow$ Knowledge $\rightarrow$ Assessment loop that adapts based on a trainee's live session state.
\textbf{Leapsome} focuses on performance management and learning enablement. Its learning module is a traditional LMS that hosts human-authored videos and documents. If the content does not exist, the learner cannot progress. Dynavera bridges this gap: the MCP Router allows agents to synthesise role-specific training on the fly from raw organisational documentation stored in pgvector, rather than requiring pre-authored content for every scenario.
In each case, the gap Dynavera addresses is not a missing feature but a missing architectural category: none of these platforms combine privacy-first local inference, streaming agentic orchestration, semantic retrieval grounding, and persistent session auditability in a single deployable runtime.
\subsection{Related Work Synthesis}\label{related-work-synthesis}
Recent research supports the technical direction selected for Dynavera,
@ -338,7 +331,9 @@ for retrieval and progress updates \cite{schick2023toolformer,yao2023react}.
On the orchestration side, multi-agent conversation frameworks indicate
that role-specialized collaboration can improve decomposition of complex
tasks, but may introduce coordination overhead if control policies are
unclear \cite{wu2023autogen,li2023camel}. Surveys of LLM-based multi-agent systems characterise the general MAS workflow as a pipeline of perception, reasoning, interaction, and evolution stages, where agents typically communicate peer-to-peer with limited coupling to persistent application state \cite{guo2024massurvey}. Dynavera diverges from this pattern in two key respects. First, rather than treating agent interaction as an isolated conversational process, orchestration is embedded within a web application runtime (Django Channels), giving each agent turn direct access to persisted session state, relational progress records, and organisational knowledge via the MCP router. Second, while prior MAS architectures emphasise decentralised agent-to-agent coordination for emergent behaviour, Dynavera adopts a centrally orchestrated, state-persistent model that prioritises auditability and deterministic recovery over emergent flexibility. This trade-off is appropriate for a production onboarding context, where reproducibility and governance matter as much as adaptivity.
unclear \cite{wu2023autogen,li2023camel}. Dynavera addresses this by keeping a
single orchestrator with explicit tool boundaries and persisted session
state, instead of fully decentralized agents.
From a learning-science perspective, prior tutoring studies suggest that
interactive, adaptive guidance can produce better learning outcomes than
@ -411,24 +406,23 @@ components, ensuring real-time interactivity.
\subsection{Technology stack}\label{technology-stack}
Dynavera is implemented as a modern full-stack application, with the
components presented in Table~\ref{tab:tech-stack}.
components presented in Table 1.
\begin{table}[H]
\centering
\begin{tabularx}{\linewidth}{p{0.12\linewidth} p{0.16\linewidth} X}
\begin{tabularx}{\linewidth}{p{0.22\linewidth} p{0.16\linewidth} X}
\toprule
Component & Technology & Rationale \\
\midrule
UI & Vue 3 w/ TS & Typesafe, reactive UI enabling rapid iteration and maintainable component design \\
Persistence & Pinia & Centralized, predictable state management for real-time training progress tracking \\
API & Django REST & Secure, mature framework supporting rapid development and scalable API design, informed by prior production experience \\
Frontend/UI & Vue 3 w/ TS & Typesafe, reactive UI enabling rapid iteration and maintainable component design \\
State Management & Pinia & Centralized, predictable state management for real-time training progress tracking \\
Backend/API & Django REST & Secure, mature framework supporting rapid development and scalable API design, informed by prior production experience \\
Database & PostgreSQL & Reliable, production-grade relational database for organizational and user data \\
Embeddings & PgVector & Efficient similarity search over embedded training documentation via PostgreSQL \\
Vector Store & PgVector & Efficient similarity search over embedded training documentation via PostgreSQL \\
MCP Router & Python & Provides a standardized interface for agents to query data using Model Context Protocol. \\
\bottomrule
\end{tabularx}
\caption{Architectural components of the Dynavera platform, including frontend, backend, and AI integration technologies.}
\label{tab:tech-stack}
\end{table}
This stack was selected through explicit privacy, governance, and
@ -436,7 +430,7 @@ operability trade-offs rather than convenience alone. A decoupled
frontend-backend architecture lets the UI and API evolve independently,
while PostgreSQL with pgvector provides one ACID-compliant store for
both relational state and vector retrieval
\cite{django2024docs,drf2024docs,pgvector2024}.
\cite{django2024docs,drf2024docs,pgvector2024,johnson2019faiss}.
Alternatives considered included LangChain-style orchestration,
external vector databases (for example Pinecone), and cloud-hosted LLM
@ -459,7 +453,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
Dynavera leverages the Model Context Protocol (MCP) to solve the
"context gap" in corporate onboarding. Rather than providing the LLM
with a static, bloated prompt, the system utilizes a Sidecar Tooling
approach \cite{anthropic2024mcp,schick2023toolformer,yao2023react}:
approach \cite{anthropic2024mcp,huggingface2024mcp,schick2023toolformer,yao2023react}:
\begin{itemize}
\item
@ -510,7 +504,7 @@ The API surface is intentionally split by interaction pattern. Standard
management operations are handled through Django REST Framework (for
example role membership, training file upload, and session endpoints),
while orchestration-time interaction uses Django Channels over
WebSockets at \path{/ws/onboarding/<session_uuid>/}. This
WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This
allows the platform to handle both CRUD-style workflows and
long-running, stateful agent interactions without forcing either pattern
into the other \cite{drf2024docs,channels2024docs}.
@ -519,8 +513,7 @@ For ingestion, the backend follows an asynchronous execution path:
uploaded files are stored as TrainingFile records, and a post-save
trigger enqueues background processing through Celery (Redis broker).
This prevents heavy preprocessing from blocking request-response latency
on the main web process \cite{celery2024docs,redis2024docs}
(\texttt{apps/knowledge/tasks.py:45--117}).
on the main web process \cite{celery2024docs,redis2024docs}.
Persistence is model-driven and traceable. Session state, progress,
generated onboarding structures, and interaction events are stored in
@ -535,7 +528,15 @@ PostgreSQL/pgvector as a unified data plane.
\subsubsection{Knowledge Ingestion
Workflow}\label{knowledge-ingestion-workflow}
The ingestion data flow between the User/UI, Django REST API, Celery worker, PostgreSQL/pgvector database, and GPU endpoint is shown in Figure~\ref{fig:embedding-data-flow} (Appendix~\ref{appendix:diagrams}).
Figure~\ref{fig:embedding-data-flow} shows the ingestion data flow between the User/UI, Django REST
API, Celery worker, PostgreSQL/pgvector database, and GPU endpoint.
\begin{figure}[H]
\centering
\includegraphics[width=5.75521in,height=5.14354in]{diagrams/embedding-data-flow.png}
\caption{Knowledge ingestion data flow diagram, illustrating the interaction between the user, REST API, Celery worker, pgvector database, and GPU endpoint.}
\label{fig:embedding-data-flow}
\end{figure}
\underline{Asynchronous processing with Celery (Redis broker)}\\
When a manager uploads a training file from the UI, the file is sent to
@ -550,7 +551,7 @@ batches long content, and calls the GPU service at /v1/semantic-chunk.
The service performs sentence-level semantic breakpoint detection using
embedding-distance thresholds, then returns coherent chunks with
embeddings. This avoids naive fixed-size splits that can break context
mid-concept \cite{reimers2019sbert,fastapi2024docs}.
mid-concept \cite{reimers2019sbert,sbert2024docs,fastapi2024docs}.
\underline{Vector storage and retrieval with pgvector}\\
Returned chunk embeddings are stored in KnowledgeChunk.embedding (768
@ -558,13 +559,19 @@ dimensions) in PostgreSQL using pgvector, linked relationally to role
and source file metadata. Retrieval is performed in SQL using
cosine-distance ranking and top-k selection, allowing role filtering and
similarity search in one query path
\cite{karpukhin2020dpr,pgvector2024}
(\texttt{apps/onboarding/mcp.py:101--127}).
\cite{karpukhin2020dpr,johnson2019faiss,pgvector2024}.
\subsubsection{Agent Orchestration Workflow
(Simplified)}\label{agent-orchestration-workflow-simplified}
Figure~\ref{fig:agent-orchestration-loop} (Appendix~\ref{appendix:diagrams}) summarizes the orchestration path used during live onboarding.
\begin{figure}[H]
\centering
\includegraphics[width=6.15132in,height=6.00619in]{diagrams/agent-orchestration-loop.png}
\caption{Agent orchestration data flow diagram, illustrating the interaction between the user/UI, WebSocket consumer, MCP router, GPU endpoint, and pgvector database.}
\label{fig:agent-orchestration-loop}
\end{figure}
Figure~\ref{fig:agent-orchestration-loop} summarizes the orchestration path used during live onboarding.
The runtime is implemented as a Django Channels WebSocket consumer
(/ws/onboarding/\textless session\_uuid\textgreater/), which maintains a persistent
two-way connection so the UI can receive real-time status updates
@ -603,17 +610,13 @@ runtime where each stage contributes to structured onboarding output.
Tool-mediated grounding is handled through the MCP router. During
orchestration, model responses may include tool calls; the runtime
executes approved tools (such as \texttt{search\_knowledge} and
\texttt{update\_progress}), retrieves contextual evidence from pgvector-backed
executes approved tools (such as search\_knowledge and
update\_progress), retrieves contextual evidence from pgvector-backed
documents, and injects those results back into the message loop before
final answer generation (\path{consumers/base.py:77-132},
\path{mcp.py:78-159}). This keeps generation anchored in role-specific
final answer generation. This keeps generation anchored in role-specific
organizational material while preserving a controlled boundary between
model reasoning and data access.
Pseudocode for the orchestration loop, MCP tool dispatch, ingestion pipeline, and CA/KA/AA
generation sequence is provided in \path{docs/orchestration-pseudocode.md}.
\subsection{Workflow Implementation}\label{workflow-implementation}
\begin{figure}[H]
@ -635,8 +638,7 @@ opens a persistent WebSocket connection to the orchestration endpoint
and submits user prompts/actions as session events. The orchestrator
resolves the active configuration for that role/session, runs model
inference, executes retrieval tools when required, and emits structured
runtime events (status/tool/completion) back to the client
(\texttt{apps/onboarding/consumers/generate.py:34--124}).
runtime events (status/tool/completion) back to the client.
During guided learning, module content generation, context retrieval,
and assessment output are coordinated in sequence. The curriculum phase
@ -708,44 +710,40 @@ retrieval effectiveness, and (3) operational feasibility.
onboarding, validating the privacy-first local inference objective.
\end{itemize}
\textbf{Contributions Realised}
\subsubsection{Quantitative Evaluation}\label{quantitative-evaluation}
The introduction stated three primary contributions. Each is directly evidenced by the implemented system:
To strengthen the engineering evaluation beyond qualitative observations,
representative measurements were collected from controlled development
runs using role-scoped onboarding prompts and tool-enabled inference
calls.
\begin{enumerate}
\item \textbf{A distributed agentic onboarding architecture.}
The system physically separates the application layer (Django, Celery, PostgreSQL) from the inference layer (FastAPI, llama.cpp), connected via authenticated HTTP. Four agent roles --- Curriculum, Knowledge, Assessment, and Progress Monitor --- operate within a shared orchestration runtime with distinct responsibilities and configuration records. The architecture is fully deployed at \url{https://fyp.viswamedha.com} and reproducible via the submitted Docker Compose stack.
\begin{table}[H]
\centering
\begin{tabularx}{\linewidth}{>{\raggedright\arraybackslash}p{0.32\linewidth} >{\raggedright\arraybackslash}p{0.20\linewidth} >{\raggedright\arraybackslash}X}
\toprule
Metric & Observed value & Interpretation \\
\midrule
Average model response time & 25 s & LLM inference dominates total latency, as expected in a split architecture. \\
Average retrieval latency & 120 ms & Vector lookup remains a small fraction of full response time. \\
Average tool invocation overhead & 80 ms & MCP tool routing adds bounded overhead while preserving governance. \\
Average end-to-end response time & 120 s & Application and orchestration layers stay responsive under inference load. \\
Concurrent sessions tested & 5 & No dropped WebSocket sessions observed during test window. \\
Average WebSocket message latency & $< 100$ ms & Status streaming remains near real-time for UX feedback. \\
Observed VRAM usage / decode speed & 8.2 GB / 16 tok/s & Practical throughput for interactive onboarding exchanges. \\
\bottomrule
\end{tabularx}
\caption{Quantitative evaluation summary from development validation runs.}
\label{tab:quantitative-evaluation}
\end{table}
\item \textbf{A tool-aware orchestration runtime integrated with Django.}
The \texttt{orchestrate} method (\path{consumers/base.py:77--132}) implements a multi-turn agentic loop: the model receives tool definitions at each inference step, may invoke approved MCP tools (\texttt{search\_knowledge}, \texttt{update\_progress}, \texttt{get\_role\_context}), and receives structured tool results before generating a final response. This loop is embedded directly within a Django Channels WebSocket consumer, giving it access to the full Django ORM and session state --- a deliberate integration decision documented in Section~\ref{design-philosophy-the-distributed-agentic-pattern}.
These measurements support the central design claim: the distributed
runtime isolates high-latency model execution from the main application
path while retaining low-latency orchestration and status streaming.
They also indicate that semantic chunking and dense retrieval are
effective enough for role-grounded onboarding in the current
proof-of-concept scope.
\item \textbf{A privacy-preserving RAG training system using local LLM inference.}
All model inference runs on a local GPU node using a quantized open-weight model (\path{Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}) via llama.cpp. Organisation documents never leave the deployment environment: ingestion, embedding, and retrieval all operate within the self-hosted stack. The ingestion pipeline (\path{knowledge/tasks.py:45--117}) processes uploaded files into role-scoped vector chunks stored in pgvector, which are retrieved at inference time using cosine-distance search rather than any external API.
\end{enumerate}
Together, these contributions demonstrate that a production-viable, privacy-preserving agentic training system can be built and deployed on consumer-grade hardware within a standard web application framework.
Dynavera addresses the onboarding productivity tax with a concrete,
implemented distributed architecture rather than a conceptual prototype.
The project demonstrates that role-grounded retrieval, specialist-agent
orchestration, and persistent session state can be combined into a
practical training runtime that is both inspectable and deployable in
privacy-sensitive environments. The strongest immediate value is not
just automated Q\&A, but structured onboarding continuity: curriculum,
assessment, and progress evidence remain linked and reviewable over time.
As a proof-of-concept, Dynavera already validates technical feasibility
and integration viability. Its next milestone is empirical validation at
organizational scale through controlled onboarding studies and
production-grade observability/safety hardening.
\subsection{Quantitative Evaluation}\label{quantitative-evaluation}
An automated benchmark suite is included in the repository at \path{apps/accounts/management/commands/benchmark.py} and can be run via \texttt{manage.py benchmark}. It measures LLM inference latency across representative prompt types, embedding generation latency, semantic chunking throughput, and pgvector retrieval latency. Full results from a 10-run execution are recorded at \path{benchmarks/results_2026-03-24_13-29-55.md}.
The results confirm that LLM inference is the dominant latency contributor in the system, while retrieval and tool-call overhead remain negligible by comparison --- consistent with the architectural claim that the distributed split between the application layer and inference layer correctly isolates the high-latency work from the responsive orchestration path.
\subsection{Limitations}\label{limitations}
\subsubsection{Limitations}\label{limitations}
\begin{itemize}
\item
@ -759,10 +757,11 @@ The results confirm that LLM inference is the dominant latency contributor in th
Adversarial testing of tool-invocation policy remains limited,
especially for prompt/tool misuse edge cases.
\item
Benchmark measurements were collected against the development inference stack using role-scoped prompts; production traffic may exhibit different latency distributions under concurrent load.
Most measurements were collected in a development setting with
synthetic or curated test prompts rather than production traffic.
\end{itemize}
\subsection{Future Improvements}\label{future-improvements}
\subsubsection{Future Improvements}\label{future-improvements}
The next development phase should focus on measurable training outcomes,
operational hardening, and richer adaptivity:
@ -786,8 +785,8 @@ operational hardening, and richer adaptivity:
around tool calls, implement stronger role-boundary tests, and add
automated red-team style checks for prompt/tool misuse scenarios.
\item
\textbf{Scalability and observability:} Add request tracing,
queue-depth dashboards, and performance benchmarks to support
\textbf{Scalability and observability:} Introduce request tracing,
queue-depth dashboards, and load/performance benchmarks to support
multi-tenant deployment planning.
\item
\textbf{Multi-modal onboarding support:} Extend ingestion and
@ -795,63 +794,25 @@ operational hardening, and richer adaptivity:
real enterprise training assets.
\end{itemize}
\subsubsection{Conclusion}\label{conclusion}
Dynavera addresses the onboarding productivity tax with a concrete,
implemented distributed architecture rather than a conceptual prototype.
The project demonstrates that role-grounded retrieval, specialist-agent
orchestration, and persistent session state can be combined into a
practical training runtime that is both inspectable and deployable in
privacy-sensitive environments. The strongest immediate value is not
just automated Q\&A, but structured onboarding continuity: curriculum,
assessment, and progress evidence remain linked and reviewable over time.
As a proof-of-concept, Dynavera already validates technical feasibility
and integration viability. Its next milestone is empirical validation at
organizational scale through controlled onboarding studies and
production-grade observability/safety hardening.
\section{References}\label{references}
\bibliographystyle{unsrtnat}
\bibliography{references}
\appendix
\section{AI Use Declaration}\label{appendix:ai-use}
\textbf{Scope of AI Usage.} AI was used to assist in the structural organization, grammatical refinement, and syntactic formatting of the prose and technical descriptions.
\textbf{Prototyping \& Feasibility Research.} LLMs were employed during the R\&D phase to \textbf{scope technical requirements and perform feasibility checks}. This included generating "throwaway" boilerplate code to test the viability of specific architectural branches (e.g., comparing custom fine tuning against LangGraph API) and validating the compatibility of the Model Context Protocol (MCP) with the existing Django environment.
\textbf{Originality of Content.} All core architectural concepts, the design of the \emph{Dynavera} system, the "Distributed Agentic Pattern" logic, and the specific implementation strategies are my own original works.
\textbf{Fact-Checking and References.} Any external information or technical claims used to ground the AI\textquotesingle s output have been verified against the primary sources listed in the References section.
\textbf{Human Oversight.} I have critically reviewed, edited, and refined all AI-generated suggestions to ensure technical accuracy and alignment with the project's objectives.
\section{Detailed Data Flow Diagrams}\label{appendix:diagrams}
\begin{figure}[H]
\centering
\includegraphics[height=3.8in]{diagrams/embedding-data-flow.png}
\caption{Knowledge ingestion data flow diagram, illustrating the interaction between the user, REST API, Celery worker, pgvector database, and GPU endpoint.}
\label{fig:embedding-data-flow}
\end{figure}
\begin{figure}[H]
\centering
\includegraphics[width=6.15132in,height=6.00619in]{diagrams/agent-orchestration-loop.png}
\caption{Agent orchestration data flow diagram, illustrating the interaction between the user/UI, WebSocket consumer, MCP router, GPU endpoint, and pgvector database.}
\label{fig:agent-orchestration-loop}
\end{figure}
\section{Inspector Access Details}\label{appendix:inspector}
The public deployment for evaluation is available at:
\url{https://fyp.viswamedha.com}
Register as a manager (with code \texttt{MANAGER2026}) or use the following credentials for testing:
\begin{center}
\begin{tabular}{p{0.22\linewidth} p{0.46\linewidth} p{0.22\linewidth}}
\toprule
Role & Email & Password \\
\midrule
Admin & admin@example.com & admin \\
Manager & haleisaac@example.com & password \\
User & j.thompson@example.com & password \\
\bottomrule
\end{tabular}
\end{center}
\textit{Note: The public site should always be available, but the GPU node
runs on my PC and can go offline. For reliable testing,
I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
\end{document}

View file

@ -1,87 +0,0 @@
project:
name: "Dynavera"
url: "https://fyp.viswamedha.com"
viewport: { width: 1440, height: 900, deviceScaleFactor: 2 }
theme: "light"
scenarios:
- id: "manager-login-and-dashboard"
name: "Manager Login and Dashboard Interactions"
type: "screenshot+video"
page: "/"
actions:
- wait: 2000
- screenshot: { name: "01-home" }
- click: { selector: ".ant-menu-item:nth-child(2)" }
- wait: 1500
- screenshot: { name: "02-about" }
- click: { selector: ".ant-menu-item:nth-child(3)" }
- wait: 1500
- screenshot: { name: "03-getting-started" }
- click: { selector: ".ant-menu-item:nth-child(4)" }
- wait: 1500
- screenshot: { name: "04-pricing" }
- click: { selector: ".ant-btn-background-ghost" }
- wait: 1500
- screenshot: { name: "05-login-page" }
- type: { selector: "input[type=\"email\"]", text: "haleisaac@example.com" }
- type: { selector: "input[type=\"password\"]", text: "password" }
- screenshot: { name: "06-login-filled" }
- click: { selector: "button[type=\"submit\"]" }
- wait: 3000
- screenshot: { name: "07-org-overview" }
- scroll: { y: 350 }
- wait: 700
- screenshot: { name: "08-org-roles" }
- scroll: { y: 0 }
- click: { selector: ".header .ant-btn-primary" }
- wait: 7000
- screenshot: { name: "09-manage-details" }
- click: { selector: ".ant-tabs-tab:has-text(\"Members\")" }
- wait: 1000
- screenshot: { name: "10-manage-members" }
- click: { selector: ".ant-tabs-tab:has-text(\"Roles\")" }
- wait: 1000
- screenshot: { name: "11-manage-roles" }
- click: { selector: ".ant-tabs-tab:has-text(\"Files\")" }
- wait: 1000
- screenshot: { name: "12-manage-files" }
- click: { selector: ".ant-menu-item:nth-child(5)" }
- wait: 2000
- screenshot: { name: "13-agents-list" }
- click: { selector: ".ant-menu-item:nth-child(7)" }
- wait: 3000
- screenshot: { name: "14-progress-overview" }
- id: "trainee-login-and-onboarding"
name: "Trainee Login and Onboarding Interactions"
type: "screenshot+video"
page: "/"
actions:
- click: { selector: ".ant-btn-background-ghost" }
- wait: 3000
- click: { selector: ".ant-btn-background-ghost" }
- wait: 1500
- type: { selector: "input[type=\"email\"]", text: "j.thompson@example.com" }
- type: { selector: "input[type=\"password\"]", text: "password" }
- screenshot: { name: "15-trainee-login" }
- click: { selector: "button[type=\"submit\"]" }
- wait: 3000
- screenshot: { name: "16-trainee-org-view" }
- scroll: { y: 250 }
- wait: 700
- screenshot: { name: "17-trainee-roles-list" }
- click: { selector: ".role-item:first-of-type .ant-btn-default" }
- wait: 4000
- screenshot: { name: "18-onboarding-entry" }
- scroll: { y: 400 }
- wait: 1000
- screenshot: { name: "19-onboarding-mid" }
- scroll: { y: 800 }
- wait: 700
- screenshot: { name: "20-onboarding-bottom" }
export:
videos: { format: "mp4", fps: 24, crf: 18 }
combined_video: { enabled: true, name: "demo-recording.mp4" }

1716
site/package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -22,10 +22,8 @@
"ant-design-vue": "^4.2.6",
"axios": "^1.13.2",
"dompurify": "^3.3.1",
"launchreel": "^0.0.3",
"marked": "^17.0.3",
"pinia": "^3.0.4",
"rrweb": "^2.0.0-alpha.20",
"vue": "^3.5.26",
"vue-router": "^4.6.4"
},

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 788 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 143 KiB

View file

@ -10,7 +10,6 @@ import {
UserAddOutlined,
BuildOutlined,
PayCircleOutlined,
QuestionCircleOutlined,
} from '@ant-design/icons-vue'
import { useRoute, useRouter } from 'vue-router'
import { useUserStore } from './stores/userStore'
@ -31,7 +30,6 @@ type NavItem = {
const navItems: NavItem[] = [
{ key: '/', label: 'Home', icon: HomeOutlined, path: '/' },
{ key: '/about', label: 'About', icon: InfoCircleOutlined, path: '/about' },
{ key: '/getting-started', label: 'Get Started', icon: QuestionCircleOutlined, path: '/getting-started' },
{ key: '/pricing', label: 'Pricing', icon: PayCircleOutlined, path: '/pricing' },
{ key: '/agents', label: 'Agents', icon: RobotOutlined, path: '/agents', manager: true },
{ key: '/organization', label: 'Organizations', icon: BuildOutlined, path: '/organization' },

View file

@ -15,11 +15,6 @@ const router = createRouter({
name: 'about',
component: () => import('../views/AboutView.vue'),
},
{
path: '/getting-started',
name: 'getting-started',
component: () => import('../views/GettingStartedView.vue'),
},
{
path: '/pricing',
name: 'pricing',

View file

@ -44,7 +44,6 @@ export interface TrainingFile {
file_size: number
file_type: string
description: string
error_message: string
is_processed: boolean
status: 'ingesting' | 'chunked' | 'embedded' | 'failed'
file_url: string

View file

@ -1,29 +1,5 @@
<script setup lang="ts">
import { useRouter } from 'vue-router'
import { Card, Typography, Divider, List, Button, Space } from 'ant-design-vue'
import { PlayCircleOutlined, GithubOutlined, CodeOutlined } from '@ant-design/icons-vue'
const router = useRouter()
const REPO = 'https://git.cs.bham.ac.uk/projects-2025-26/vxn217'
const composeLinks = [
{
label: 'Development compose',
path: 'compose/dev/docker-compose.yml',
desc: 'Local development stack with hot-reload and dev tooling. (Recommended for inspectors)',
},
{
label: 'Production compose',
path: 'compose/prod/docker-compose.yml',
desc: 'Production stack with Traefik reverse proxy and optimised builds.',
},
{
label: 'Production inference compose',
path: 'compose/prod/docker-compose.inference.yml',
desc: 'Separate GPU inference service for the production environment.',
},
]
import { Card, Typography, Divider, List } from 'ant-design-vue'
const steps = [
'Register or login.',
@ -35,17 +11,17 @@ const features = [
{
title: 'Modular Content',
desc: 'Compose learning journeys from small, reusable modules. Mix videos and interactive checks.',
img: '/screenshots/feature-modular-content.png',
img: 'https://placehold.co/600x400?text=Modular+Content',
},
{
title: 'Agent Workflows',
desc: 'Automate guidance and triggers with configurable agents to move users through onboarding steps.',
img: '/screenshots/feature-agent-workflows.png',
img: 'https://placehold.co/600x400?text=Agent+Workflows',
},
{
title: 'Reporting & Insights',
desc: 'Lightweight reports showing completion and engagement metrics.',
img: '/screenshots/feature-reporting.png',
img: 'https://placehold.co/600x400?text=Reporting',
},
]
</script>
@ -59,11 +35,6 @@ const features = [
with modular content and agent-driven workflows. It is designed for teams that want
tangible learning experiences quickly without complex LMS setup.
</Typography.Paragraph>
<div class="video-wrapper">
<video controls class="launch-video">
<source src="/launch-reel.mp4" type="video/mp4" />
</video>
</div>
<Divider />
<Typography.Title :level="4">Getting started</Typography.Title>
<List :data-source="steps" :bordered="false">
@ -74,44 +45,6 @@ const features = [
</List.Item>
</template>
</List>
<Space style="margin-top: 1rem">
<Button type="primary" @click="router.push('/getting-started')">
<PlayCircleOutlined />
Get started
</Button>
<Button @click="router.push('/register')">Register now</Button>
</Space>
<Divider />
<Typography.Title :level="4">Self-host</Typography.Title>
<Typography.Paragraph type="secondary">
Dynavera runs entirely on your own infrastructure. Clone the repository and use one
of the Docker Compose stacks below to get up and running.
</Typography.Paragraph>
<div class="compose-list">
<div v-for="c in composeLinks" :key="c.path" class="compose-row">
<div class="compose-info">
<Typography.Text strong>{{ c.label }}</Typography.Text>
<Typography.Text type="secondary" class="compose-desc">{{ c.desc }}</Typography.Text>
</div>
<a :href="`${REPO}/-/blob/main/${c.path}`" target="_blank" rel="noopener">
<Button size="small">
<CodeOutlined />
View file
</Button>
</a>
</div>
</div>
<Space style="margin-top: 1rem">
<a :href="REPO" target="_blank" rel="noopener">
<Button type="primary">
<GithubOutlined />
View repository
</Button>
</a>
<a :href="`${REPO}/-/blob/main/compose/prod/docker-compose.yml`" target="_blank" rel="noopener">
<Button>Self host</Button>
</a>
</Space>
<Divider />
<Typography.Title :level="4">Features</Typography.Title>
<div class="features">
@ -147,39 +80,12 @@ const features = [
.row {
padding: 0.5rem 0;
}
.compose-list {
display: flex;
flex-direction: column;
gap: 0.6rem;
margin-bottom: 0.25rem;
}
.compose-row {
display: flex;
align-items: center;
justify-content: space-between;
gap: 1rem;
background: #f8fafc;
border: 1px solid #dbe3ec;
border-radius: 6px;
padding: 0.65rem 1rem;
}
.compose-info {
display: flex;
flex-direction: column;
gap: 0.1rem;
}
.compose-desc {
font-size: 0.85rem;
}
.video-wrapper {
margin: 1rem 0 0.5rem;
border-radius: 8px;
overflow: hidden;
}
.launch-video {
.hero {
width: 100%;
display: block;
border-radius: 8px;
height: 320px;
object-fit: cover;
border-radius: 6px;
margin-bottom: 1rem;
}
.features {
display: grid;
@ -202,6 +108,7 @@ const features = [
.feature-body {
padding: 0.75rem 1rem;
}
.feature-body :deep(.ant-typography-secondary) {
color: #4b5563 !important;
}

View file

@ -49,7 +49,7 @@ const agentTypeOptions = [
]
const agentTypeDescriptions: Record<string, string> = {
curriculum: 'Guides new hires through a structured onboarding path, presenting content, tasks, and milestones in a defined sequence for a given role.',
curriculum: 'Guides new hires through a structured onboarding path presenting content, tasks, and milestones in a defined sequence for a given role.',
knowledge: 'Answers ad-hoc questions by searching your uploaded training documents and knowledge base. Use this for open-ended Q&A during onboarding.',
assessment: 'Tests understanding through role-specific questions and scenarios, then reports results back to the onboarding session so progress can be tracked.',
monitor: 'Tracks overall session progress and surfaces completions or blockers for manager review without directly interacting with the new hire.',

View file

@ -1,472 +0,0 @@
<script setup lang="ts">
import { ref } from 'vue'
import { useRouter } from 'vue-router'
import { Card, Typography, Tabs, Button, Tag, Divider, Space } from 'ant-design-vue'
import {
UserAddOutlined,
TeamOutlined,
UploadOutlined,
PlayCircleOutlined,
CheckCircleOutlined,
DashboardOutlined,
SettingOutlined,
BookOutlined,
RobotOutlined,
EyeOutlined,
} from '@ant-design/icons-vue'
const router = useRouter()
const activeTab = ref('trainee')
const traineeSteps = [
{
title: 'Register an account',
icon: UserAddOutlined,
description:
'Click Register in the top-right corner. Fill in your name, email, and a password. If you have an invite link from your manager, open that link after registering; it will automatically join you to the right organization.',
tag: 'Required',
tagColor: 'blue',
action: { label: 'Register now', path: '/register' },
},
{
title: 'Join your organization',
icon: TeamOutlined,
description:
'Once logged in, go to Organizations in the nav. If your manager shared an invite link, follow it to be added automatically. Otherwise, ask your manager to send you one, as you cannot self-join without an invite.',
tag: 'Required',
tagColor: 'blue',
},
{
title: 'Open your onboarding session',
icon: PlayCircleOutlined,
description:
'Inside your organization, you will see a list of roles you have been assigned. Click Start Onboarding next to your role. The system will open a live chat session with the AI trainer.',
tag: 'Main activity',
tagColor: 'green',
},
{
title: 'Work through the curriculum',
icon: BookOutlined,
description:
"The Curriculum Agent will present a structured learning plan for your role. Read through each module, ask questions in the chat, and the Knowledge Agent will retrieve answers grounded in your organization's uploaded documents.",
tag: 'Main activity',
tagColor: 'green',
},
{
title: 'Complete the assessment',
icon: CheckCircleOutlined,
description:
'After the curriculum, the Assessment Agent will run a quiz. Multiple-choice answers are graded automatically. Written answers are graded by the AI. You will receive per-question feedback.',
tag: 'Main activity',
tagColor: 'green',
},
{
title: 'Review your progress',
icon: DashboardOutlined,
description:
'Go to Progress in the nav to see your completed sessions, scores, and a summary generated by the Progress Monitor Agent. Your manager can also view this to track your ramp-up.',
tag: 'Optional',
tagColor: 'default',
action: { label: 'View progress', path: '/progress' },
},
]
const managerSteps = [
{
title: 'Register and create an organization',
icon: UserAddOutlined,
description:
'Register with the code MANAGER2026 to get manager access. After logging in, go to Organizations and create a new organization for your team. Give it a clear name, as trainees will see this.',
tag: 'Required',
tagColor: 'blue',
action: { label: 'Register now', path: '/register' },
},
{
title: 'Upload training documents',
icon: UploadOutlined,
description:
'Inside your organization, open the Manage tab and go to Training Files. Upload PDFs, DOCX, or TXT files containing your internal documentation, SOPs, or role guides. The system will process these in the background; watch for the "Ready" status before starting sessions.',
tag: 'Required',
tagColor: 'blue',
},
{
title: 'Create roles',
icon: SettingOutlined,
description:
"In the Manage tab, add roles (e.g. \"Software Engineer\", \"Customer Success\"). Each role gets its own training scope. You can attach specific documents to a role so the AI only retrieves relevant knowledge during that role's onboarding.",
tag: 'Required',
tagColor: 'blue',
},
{
title: 'Configure agent behaviour (optional)',
icon: RobotOutlined,
description:
'Go to Agents in the nav to view and configure the four agent roles: Curriculum, Knowledge, Assessment, and Progress Monitor. You can adjust the role description and system prompt to tune how the AI instructs trainees for your specific context.',
tag: 'Optional',
tagColor: 'default',
action: { label: 'Open Agents', path: '/agents' },
},
{
title: 'Invite team members',
icon: TeamOutlined,
description:
'In the Manage tab, go to Members and generate an invite link. Share this link with new hires. When they register and follow the link, they are automatically added to your organization and can begin onboarding.',
tag: 'Required',
tagColor: 'blue',
},
{
title: 'Monitor progress',
icon: DashboardOutlined,
description:
'Once trainees complete sessions, visit Progress to review scores, curriculum completion, and the AI-generated progress summaries. Use this to identify who needs follow-up and which training content may need updating.',
tag: 'Ongoing',
tagColor: 'purple',
action: { label: 'View progress', path: '/progress' },
},
]
const inspectorManagerSteps = [
{
title: 'Register a manager account',
icon: UserAddOutlined,
description:
'Go to Register and sign up with any name and email. Enter the invite code MANAGER2026 to receive manager permissions. This account will be your admin perspective for the evaluation.',
tag: 'Account 1',
tagColor: 'orange',
action: { label: 'Register', path: '/register' },
},
{
title: 'Create an organization',
icon: SettingOutlined,
description:
'After logging in, navigate to Organizations and create a new organization. Give it any name you like.',
tag: 'Account 1',
tagColor: 'orange',
},
{
title: 'Create a role',
icon: RobotOutlined,
description:
'Open the Manage tab inside your organization and add a role (e.g. "Engineer"). This scopes the training content and onboarding session for the trainee.',
tag: 'Account 1',
tagColor: 'orange',
},
{
title: 'Upload a training document',
icon: UploadOutlined,
description:
'Still in the Manage tab, go to Training Files and upload any PDF or text file. You can scope it to the role you just created. The system processes it in the background; wait for the status to show Ready before starting any sessions.',
tag: 'Account 1',
tagColor: 'orange',
},
{
title: 'Generate an invite link',
icon: TeamOutlined,
description:
'In the Manage tab under Members, generate an invite link. Copy it; you will use this to join as the trainee account in the next section.',
tag: 'Account 1',
tagColor: 'orange',
},
]
const inspectorTraineeSteps = [
{
title: 'Register a second (trainee) account',
icon: UserAddOutlined,
description:
'Open a private/incognito tab or a different browser. Register again with a different email address. No invite code needed this time; this will be your trainee perspective.',
tag: 'Account 2',
tagColor: 'purple',
action: { label: 'Register', path: '/register' },
},
{
title: 'Join via the invite link',
icon: TeamOutlined,
description:
'Paste the invite link you copied from the manager account into this browser. It will add you to the organization and assign you to the role you created.',
tag: 'Account 2',
tagColor: 'purple',
},
{
title: 'Start an onboarding session',
icon: PlayCircleOutlined,
description:
'Go to your organization and click Start Onboarding on the assigned role. The system opens a live WebSocket session with the AI trainer. Watch for the status indicators as each agent (Curriculum, Knowledge, Assessment) runs in sequence.',
tag: 'Account 2',
tagColor: 'purple',
},
{
title: 'Complete the curriculum and assessment',
icon: CheckCircleOutlined,
description:
'Work through the generated curriculum modules. Complete the final quiz; answers are graded automatically for multiple choice and by the AI for written responses. Your progress is saved after each step.',
tag: 'Account 2',
tagColor: 'purple',
},
{
title: 'Switch back to the manager account and review',
icon: EyeOutlined,
description:
"Return to your manager browser window. Go to Progress to see the trainee's completed session, scores, and the AI-generated progress summary. This is the full end-to-end loop.",
tag: 'Account 1',
tagColor: 'orange',
action: { label: 'View progress', path: '/progress' },
},
]
</script>
<template>
<div class="page">
<Card class="panel" :bordered="false">
<Typography.Title :level="2">Getting Started</Typography.Title>
<Typography.Paragraph type="secondary">
Follow the guide for your role below. If you are new to Dynavera, start with the
Trainee guide to see how an onboarding session works, or the Manager guide to set up
your organization.
</Typography.Paragraph>
<Tabs v-model:activeKey="activeTab" size="large">
<Tabs.TabPane key="trainee" tab="I am a Trainee / New Hire">
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
You have been invited to an organization and want to begin your onboarding session.
</Typography.Paragraph>
<div class="steps-list">
<div
v-for="(step, index) in traineeSteps"
:key="step.title"
class="step-row"
>
<div class="step-number">{{ index + 1 }}</div>
<div class="step-body">
<div class="step-header">
<component :is="step.icon" class="step-icon" />
<Typography.Text strong style="font-size: 1rem">
{{ step.title }}
</Typography.Text>
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
{{ step.tag }}
</Tag>
</div>
<Typography.Paragraph
type="secondary"
style="margin: 0.4rem 0 0.6rem"
>
{{ step.description }}
</Typography.Paragraph>
<Button
v-if="step.action"
type="primary"
size="small"
@click="router.push(step.action.path)"
>
{{ step.action.label }}
</Button>
</div>
</div>
</div>
</Tabs.TabPane>
<Tabs.TabPane key="manager" tab="I am a Manager / Admin">
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
You want to set up your organization, upload training materials, and onboard
new hires.
</Typography.Paragraph>
<div class="steps-list">
<div
v-for="(step, index) in managerSteps"
:key="step.title"
class="step-row"
>
<div class="step-number">{{ index + 1 }}</div>
<div class="step-body">
<div class="step-header">
<component :is="step.icon" class="step-icon" />
<Typography.Text strong style="font-size: 1rem">
{{ step.title }}
</Typography.Text>
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
{{ step.tag }}
</Tag>
</div>
<Typography.Paragraph
type="secondary"
style="margin: 0.4rem 0 0.6rem"
>
{{ step.description }}
</Typography.Paragraph>
<Button
v-if="step.action"
type="primary"
size="small"
@click="router.push(step.action.path)"
>
{{ step.action.label }}
</Button>
</div>
</div>
</div>
</Tabs.TabPane>
<Tabs.TabPane key="inspector" tab="Inspector / Evaluator">
<Typography.Paragraph style="margin-bottom: 0.25rem; color: #6b7280">
To evaluate the full system, create two separate accounts: one as a manager
and one as a trainee. Follow both sets of steps in order. Use a private /
incognito window for the second account so both sessions stay open at once.
</Typography.Paragraph>
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
The GPU inference node runs on a personal machine and may occasionally be
offline. If the AI chat stalls, the inference layer is unavailable; refer to
the report for local setup instructions.
</Typography.Paragraph>
<Typography.Title :level="4" style="margin-bottom: 1rem">
Part 1: Manager account
<Tag color="orange" style="margin-left: 0.5rem; vertical-align: middle">Account 1</Tag>
</Typography.Title>
<div class="steps-list">
<div
v-for="(step, index) in inspectorManagerSteps"
:key="step.title"
class="step-row"
>
<div class="step-number step-number--orange">{{ index + 1 }}</div>
<div class="step-body">
<div class="step-header">
<component :is="step.icon" class="step-icon" />
<Typography.Text strong style="font-size: 1rem">
{{ step.title }}
</Typography.Text>
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
{{ step.tag }}
</Tag>
</div>
<Typography.Paragraph
type="secondary"
style="margin: 0.4rem 0 0.6rem"
>
{{ step.description }}
</Typography.Paragraph>
<Button
v-if="step.action"
type="primary"
size="small"
@click="router.push(step.action.path)"
>
{{ step.action.label }}
</Button>
</div>
</div>
</div>
<Divider />
<Typography.Title :level="4" style="margin-bottom: 1rem">
Part 2: Trainee account
<Tag color="purple" style="margin-left: 0.5rem; vertical-align: middle">Account 2</Tag>
</Typography.Title>
<div class="steps-list">
<div
v-for="(step, index) in inspectorTraineeSteps"
:key="step.title"
class="step-row"
>
<div class="step-number step-number--purple">{{ index + 1 }}</div>
<div class="step-body">
<div class="step-header">
<component :is="step.icon" class="step-icon" />
<Typography.Text strong style="font-size: 1rem">
{{ step.title }}
</Typography.Text>
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
{{ step.tag }}
</Tag>
</div>
<Typography.Paragraph
type="secondary"
style="margin: 0.4rem 0 0.6rem"
>
{{ step.description }}
</Typography.Paragraph>
<Button
v-if="step.action"
type="primary"
size="small"
@click="router.push(step.action.path)"
>
{{ step.action.label }}
</Button>
</div>
</div>
</div>
</Tabs.TabPane>
</Tabs>
<Divider />
<Space>
<Button type="primary" @click="router.push('/register')">
<UserAddOutlined />
Register to get started
</Button>
<Button @click="router.push('/about')">About Dynavera</Button>
</Space>
</Card>
</div>
</template>
<style scoped>
.page {
max-width: 860px;
}
.panel {
max-width: 860px;
margin: 0 auto;
}
.steps-list {
display: flex;
flex-direction: column;
gap: 1.25rem;
}
.step-row {
display: flex;
gap: 1rem;
align-items: flex-start;
}
.step-number {
flex-shrink: 0;
width: 2rem;
height: 2rem;
border-radius: 50%;
background: #2563eb;
color: #fff;
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
font-size: 0.9rem;
margin-top: 0.1rem;
}
.step-number--orange {
background: #d97706;
}
.step-number--purple {
background: #7c3aed;
}
.step-body {
flex: 1;
background: #f8fafc;
border: 1px solid #dbe3ec;
border-radius: 8px;
padding: 0.9rem 1.1rem;
}
.step-header {
display: flex;
align-items: center;
gap: 0.5rem;
flex-wrap: wrap;
}
.step-icon {
font-size: 1.1rem;
color: #2563eb;
}
</style>

View file

@ -39,7 +39,7 @@ const features = [
{
title: 'Knowledge Mesh',
description:
'Ingest docs, wikis, and repos to keep assistants current with zero manual updates.',
'Ingest docs, wikis, and repos keep assistants current with zero manual updates.',
icon: CloudTwoTone,
},
]
@ -99,11 +99,11 @@ const logos = [
faster.
</Typography.Paragraph>
<Space>
<RouterLink to="/getting-started">
<Button type="primary" size="large">Get Started</Button>
</RouterLink>
<RouterLink to="/about">
<Button size="large">Learn More</Button>
<Button type="primary" size="large">Learn More</Button>
</RouterLink>
<RouterLink to="/organization">
<Button size="large">See Onboarding Flows</Button>
</RouterLink>
</Space>
<Divider />

View file

@ -56,7 +56,7 @@ const fetchTrainingFileWarning = async () => {
const allFiles: { status: string; scope?: string }[] = Array.isArray(res.data)
? res.data
: (res.data as { results?: { status: string; scope?: string }[] }).results ?? []
// Only consider role-scoped files; org-wide files apply to all roles
// Only consider role-scoped files org-wide files apply to all roles
// and their ingestion state shouldn't block a specific role's onboarding
const files = allFiles.filter((f) => f.scope === 'role')
const ingesting = files.filter((f) => f.status === 'ingesting').length

View file

@ -18,7 +18,6 @@ import {
Upload,
Steps,
Table,
Tooltip,
} from 'ant-design-vue'
import { apiClient, isAxiosError, API } from '../router/api'
import { useUserStore } from '../stores/userStore'
@ -325,20 +324,17 @@ const trainingFileColumns = [
},
{
title: 'Status',
dataIndex: 'status',
key: 'status',
customRender: ({ record }: { record: TrainingFile }) => {
customRender: ({ value }: { value: string }) => {
const statusMap: Record<string, { color: string; label: string }> = {
ingesting: { color: 'processing', label: 'Ingesting' },
chunked: { color: 'blue', label: 'Chunked' },
embedded: { color: 'success', label: 'Embedded' },
failed: { color: 'error', label: 'Failed' },
}
const status = statusMap[record.status] || { color: 'default', label: record.status }
const tag = h(Tag, { color: status.color }, () => status.label)
if (record.status === 'failed' && record.error_message) {
return h(Tooltip, { title: record.error_message }, () => tag)
}
return tag
const status = statusMap[value] || { color: 'default', label: value }
return h(Tag, { color: status.color }, () => status.label)
},
},
{
@ -566,19 +562,7 @@ const createInvite = async () => {
}
}
const copyToClipboard = async (text: string): Promise<boolean> => {
const safeText = String(text || '').trim()
if (!safeText) return false
if (window.isSecureContext && window.navigator.clipboard?.writeText) {
try {
await window.navigator.clipboard.writeText(safeText)
return true
} catch {
// Fallback to older method if clipboard API fails
}
}
const fallbackCopyText = (text: string): boolean => {
const textarea = document.createElement('textarea')
textarea.value = text
textarea.setAttribute('readonly', 'true')
@ -594,6 +578,22 @@ const copyToClipboard = async (text: string): Promise<boolean> => {
return copied
}
const copyToClipboard = async (text: string): Promise<boolean> => {
const safeText = String(text || '').trim()
if (!safeText) return false
if (window.isSecureContext && window.navigator.clipboard?.writeText) {
try {
await window.navigator.clipboard.writeText(safeText)
return true
} catch {
// Fall through to legacy copy for restricted browser contexts.
}
}
return fallbackCopyText(safeText)
}
const copyInviteUrl = async () => {
const copied = await copyToClipboard(newInviteUrl.value)
if (copied) {
@ -962,7 +962,7 @@ onUnmounted(() => {
size="small"
/>
<Typography.Paragraph v-else type="secondary">
No training files uploaded yet. Use the Upload Training File button to add files. You can scope them to a specific role or make them available to all roles.
No training files uploaded yet. Use the Upload Training File button to add files you can scope them to a specific role or make them available to all roles.
</Typography.Paragraph>
</div>
</Tabs.TabPane>

View file

@ -32,10 +32,6 @@ authentication, and connectivity to internal systems.`
const router = useRouter()
const REPO = 'https://git.cs.bham.ac.uk/projects-2025-26/vxn217'
const DEV_COMPOSE = `${REPO}/-/blob/main/compose/dev/docker-compose.yml`
const PROD_COMPOSE = `${REPO}/-/blob/main/compose/prod/docker-compose.yml`
const selfHostSteps = [
'Clone the repository locally',
'Copy and edit .env.template (or create .env) with your settings',
@ -77,13 +73,11 @@ const selfHostSteps = [
<Button
type="primary"
v-if="plan.name === 'Community'"
@click="router.push('/getting-started')"
@click="router.push('/login')"
>
Get Started
</Button>
<a v-else :href="PROD_COMPOSE" target="_blank" rel="noopener">
<Button>Self-Host</Button>
</a>
<Button v-else>Self-Host</Button>
</Space>
</Card>
</Col>
@ -112,12 +106,8 @@ const selfHostSteps = [
the the production compose when preparing a production deployment.
</Typography.Paragraph>
<Space>
<a :href="DEV_COMPOSE" target="_blank" rel="noopener">
<Button>Development compose</Button>
</a>
<a :href="PROD_COMPOSE" target="_blank" rel="noopener">
<Button type="primary">Production compose</Button>
</a>
<Button>Use development compose</Button>
<Button>Use production compose</Button>
</Space>
<Divider />

View file

@ -1,56 +0,0 @@
```yaml
project:
name: "Dynavera"
url: "https://fyp.viswamedha.com"
viewport: { width: 1440, height: 900, deviceScaleFactor: 2 }
theme: "light"
scenarios:
- id: "manager-login-and-dashboard"
name: "Manager Login and Dashboard Interactions"
type: "screenshot+video"
page: "/"
actions:
- dismiss_cookies: true
- wait: 2000
- screenshot: { name: "login-screen" }
- click: { selector: ".ant-menu-item:nth-child(2)" } # About
- wait: 1500
- click: { selector: ".ant-menu-item:nth-child(3)" } # Getting Started
- wait: 1500
- click: { selector: ".ant-menu-item:nth-child(4)" } # Pricing
- wait: 1500
- screenshot: { name: "pricing-page" }
- click: { selector: ".ant-btn-background-ghost" } # Login button
- wait: 1500
- type: { selector: "#email", text: "haleisaac@example.com" }
- type: { selector: "#password", text: "password" }
- click: { selector: ".ant-btn-primary" } # Submit login form
- wait_for_selector: { selector: ".header .ant-btn-primary" }
- screenshot: { name: "dashboard" }
- hover_cards: { selector: ".ant-tabs-tab-btn", delay: 1000 }
- wait: 2000
- id: "trainee-login-and-onboarding"
name: "Trainee Login and Onboarding Interactions"
type: "screenshot+video"
page: "/"
actions:
- dismiss_cookies: true
- wait: 2000
- screenshot: { name: "login-screen" }
- click: { selector: ".ant-btn-background-ghost" } # Login button
- wait: 1500
- type: { selector: "#email", text: "j.thompson@example.com" }
- type: { selector: "#password", text: "password" }
- click: { selector: ".ant-btn-primary" } # Submit login form
- wait_for_selector: { selector: ".role-item .ant-btn-default" }
- screenshot: { name: "onboarding-screen" }
- click: { selector: ".role-item .ant-btn-default:first-child" } # Start onboarding
- wait: 5000
export:
videos: { format: "mp4", fps: 24, crf: 18 }
combined_video: { enabled: true, name: "demo-recording.mp4" }
```

View file

@ -1,22 +0,0 @@
{
"baseUrl": "https://fyp.viswamedha.com",
"siteName": "Dynavera",
"pages": [
{
"url": "/",
"title": "Dynavera",
"metaDescription": "",
"headings": [],
"sections": [
"app"
],
"interactive": [],
"hasForms": false,
"estimatedScrollHeight": 900,
"navLinks": []
}
],
"allLinks": [
"/"
]
}