Compare commits
10 commits
f74337f60f
...
c2da78b8d5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2da78b8d5 | ||
|
|
4ae45ff647 | ||
|
|
9afabeb6f7 | ||
|
|
9eb574e8dc | ||
|
|
8d9abcdfa6 | ||
|
|
1a101d0f56 | ||
|
|
b6b37a4a71 | ||
|
|
8bdd415b4d | ||
|
|
9044e2afaa | ||
|
|
a6ed273a13 |
38 changed files with 3840 additions and 296 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -275,3 +275,4 @@ models/
|
|||
# Cache
|
||||
hf_cache/
|
||||
|
||||
launchreel-output/
|
||||
484
apps/accounts/management/commands/benchmark.py
Normal file
484
apps/accounts/management/commands/benchmark.py
Normal file
|
|
@ -0,0 +1,484 @@
|
|||
import datetime
|
||||
import json
|
||||
import statistics
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Q
|
||||
from pgvector.django import CosineDistance
|
||||
|
||||
from apps.accounts.models import Organization, Role, User
|
||||
from apps.knowledge.models import KnowledgeChunk, TrainingFile
|
||||
from apps.onboarding.models import OnboardingSession
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Benchmark Dynavera system components: GPU inference server, pgvector retrieval, and database."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("--runs", type=int, default=5, help="Repetitions per latency benchmark (default: 5)")
|
||||
parser.add_argument("--out", type=str, default="benchmarks", help="Output directory for the results file (default: benchmarks/)")
|
||||
parser.add_argument("--skip-llm", action="store_true", help="Skip LLM inference benchmarks (each prompt takes ~30 s)")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.runs = options["runs"]
|
||||
self.skip_llm = options["skip_llm"]
|
||||
self.out_dir = Path(options["out"])
|
||||
self.out_dir.mkdir(exist_ok=True)
|
||||
self.results = {}
|
||||
|
||||
self.stdout.write(self.style.SUCCESS("\n=== Dynavera System Benchmark ==="))
|
||||
self.stdout.write(f" Inference endpoint : {settings.INFERENCE_URL}")
|
||||
self.stdout.write(f" Repetitions : {self.runs}")
|
||||
self.stdout.write(f" LLM benchmarks : {'SKIPPED (--skip-llm)' if self.skip_llm else 'ENABLED'}\n")
|
||||
|
||||
self._bench_health()
|
||||
self._bench_embeddings()
|
||||
self._bench_chunking()
|
||||
if not self.skip_llm:
|
||||
self._bench_llm()
|
||||
self._bench_database()
|
||||
self._bench_retrieval()
|
||||
self._print_summary()
|
||||
self._save_report()
|
||||
|
||||
def _req(self, method, path, **kwargs):
|
||||
url = f"{settings.INFERENCE_URL}{path}"
|
||||
resp = httpx.request(method, url, auth=settings.INFERENCE_AUTH, timeout=180, **kwargs)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _time_fn(self, fn):
|
||||
t0 = time.perf_counter()
|
||||
result = fn()
|
||||
return result, (time.perf_counter() - t0) * 1000
|
||||
|
||||
def _stats(self, times_ms):
|
||||
s = sorted(times_ms)
|
||||
n = len(s)
|
||||
p95_idx = min(n - 1, int(-(-(0.95 * n) // 1)) - 1)
|
||||
return {
|
||||
"mean_ms": round(statistics.mean(s), 1),
|
||||
"median_ms": round(statistics.median(s), 1),
|
||||
"p95_ms": round(s[p95_idx], 1),
|
||||
"min_ms": round(s[0], 1),
|
||||
"max_ms": round(s[-1], 1),
|
||||
}
|
||||
|
||||
def _bench_health(self):
|
||||
self.stdout.write("[ 1/6 ] GPU server health check ...")
|
||||
try:
|
||||
data, ms = self._time_fn(lambda: self._req("GET", "/health"))
|
||||
ok = data.get("status") == "ok"
|
||||
self.results["health"] = {
|
||||
"status": "OK" if ok else "DEGRADED",
|
||||
"llm_ready": data.get("llm_ready", False),
|
||||
"embed_ready": data.get("embedding_ready", False),
|
||||
"latency_ms": round(ms, 1),
|
||||
}
|
||||
h = self.results["health"]
|
||||
self.stdout.write(
|
||||
f" {h['status']} | LLM: {'ready' if h['llm_ready'] else 'unloaded'} "
|
||||
f"| Embed: {'ready' if h['embed_ready'] else 'not ready'} | {ms:.0f} ms"
|
||||
)
|
||||
except Exception as exc:
|
||||
self.results["health"] = {"status": "ERROR", "error": str(exc)}
|
||||
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
|
||||
|
||||
def _bench_embeddings(self):
|
||||
self.stdout.write(f"\n[ 2/6 ] Embedding latency ({self.runs} runs × 3 query lengths) ...")
|
||||
queries = {
|
||||
"short ": "What is onboarding?",
|
||||
"medium ": (
|
||||
"Explain the process for configuring access control policies for a new software engineer "
|
||||
"joining the platform team, including approval workflows and tool provisioning steps."
|
||||
),
|
||||
"long ": (
|
||||
"A new hire on the infrastructure team needs to understand our CI/CD pipeline, deployment "
|
||||
"procedures, incident response protocols, monitoring dashboards, on-call rotation policy, "
|
||||
"and how to request access to production systems. Provide a comprehensive overview of all "
|
||||
"these areas including the relevant tools, key contacts, and escalation procedures they "
|
||||
"should be aware of during their first week and first month at the company."
|
||||
),
|
||||
}
|
||||
embed_results = {}
|
||||
for label, query in queries.items():
|
||||
times = []
|
||||
for _ in range(self.runs):
|
||||
_, ms = self._time_fn(lambda q=query: self._req("POST", "/v1/embeddings", json={"input": q}))
|
||||
times.append(ms)
|
||||
st = self._stats(times)
|
||||
embed_results[label.strip()] = {"query_chars": len(query), **st}
|
||||
self.stdout.write(
|
||||
f" {label}({len(query):4d} chars) mean={st['mean_ms']:.0f} ms "
|
||||
f"p95={st['p95_ms']:.0f} ms min={st['min_ms']:.0f} ms max={st['max_ms']:.0f} ms"
|
||||
)
|
||||
self.results["embeddings"] = embed_results
|
||||
|
||||
def _bench_chunking(self):
|
||||
self.stdout.write("\n[ 3/6 ] Semantic chunking latency ...")
|
||||
texts = {
|
||||
"small (~200 c)": "a " * 100,
|
||||
"medium (~2k c) ": (
|
||||
"This section covers the onboarding process for new employees joining the engineering team. "
|
||||
"You will learn about code review practices, deployment procedures, incident response, and "
|
||||
"team communication protocols. Each topic is covered in depth with examples and references "
|
||||
"to internal documentation. All engineers are expected to complete this module in week one. "
|
||||
) * 5,
|
||||
"large (~8k c) ": (
|
||||
"The infrastructure team manages all cloud resources, CI/CD pipelines, and production environments. "
|
||||
"New members are expected to understand Kubernetes cluster management, Terraform IaC, "
|
||||
"GitLab CI pipeline authoring, monitoring with Grafana and Prometheus, and incident response procedures. "
|
||||
"This document provides a comprehensive guide to each area including runbooks and escalation paths. "
|
||||
) * 20,
|
||||
}
|
||||
chunk_results = {}
|
||||
for label, text in texts.items():
|
||||
try:
|
||||
result, ms = self._time_fn(lambda t=text: self._req("POST", "/v1/semantic-chunk", json={"text": t}))
|
||||
n = len(result.get("chunks", []))
|
||||
chunk_results[label.strip()] = {"chars": len(text), "chunks_produced": n, "latency_ms": round(ms, 1)}
|
||||
self.stdout.write(f" {label} → {n} chunks | {ms:.0f} ms")
|
||||
except Exception as exc:
|
||||
chunk_results[label.strip()] = {"error": str(exc)}
|
||||
self.stdout.write(self.style.ERROR(f" {label} FAILED: {exc}"))
|
||||
self.results["chunking"] = chunk_results
|
||||
|
||||
def _bench_llm(self):
|
||||
self.stdout.write("\n[ 4/6 ] LLM inference latency (each prompt is a single non-streaming call) ...")
|
||||
prompts = [
|
||||
{
|
||||
"label": "short_qa",
|
||||
"system": "You are an onboarding assistant.",
|
||||
"user": "What does a Kubernetes pod do? Answer in 2 sentences.",
|
||||
"max_tokens": 128,
|
||||
},
|
||||
{
|
||||
"label": "progress_summary",
|
||||
"system": "You are an onboarding assistant.",
|
||||
"user": (
|
||||
"A trainee has completed: Git Basics, CI/CD Pipelines, Code Review. Score: 85%. "
|
||||
"Write a 2-sentence progress summary."
|
||||
),
|
||||
"max_tokens": 128,
|
||||
},
|
||||
{
|
||||
"label": "curriculum_gen",
|
||||
"system": "You are an onboarding assistant. Output only a valid JSON array of strings.",
|
||||
"user": (
|
||||
"Create a 6-module onboarding curriculum for a Software Engineer role focused on "
|
||||
"backend services. Output ONLY a JSON array of module title strings."
|
||||
),
|
||||
"max_tokens": 256,
|
||||
},
|
||||
{
|
||||
"label": "assessment_gen",
|
||||
"system": "You are an onboarding assistant. Output only valid JSON.",
|
||||
"user": (
|
||||
"Generate 3 multiple-choice questions to assess understanding of CI/CD pipelines. "
|
||||
"Output as a JSON array of objects with keys: question, options (array of 4), answer."
|
||||
),
|
||||
"max_tokens": 512,
|
||||
},
|
||||
{
|
||||
"label": "knowledge_explanation",
|
||||
"system": "You are an onboarding assistant.",
|
||||
"user": (
|
||||
"Explain Git branching strategy best practices for a new engineer. "
|
||||
"Cover: feature branches, naming conventions, merge vs rebase, and PR workflow. "
|
||||
"Use clear headings and bullet points. Target ~400 words."
|
||||
),
|
||||
"max_tokens": 700,
|
||||
},
|
||||
]
|
||||
llm_results = {}
|
||||
for p in prompts:
|
||||
self.stdout.write(f" {p['label']} (max_tokens={p['max_tokens']}) ...", ending="")
|
||||
self.stdout.flush()
|
||||
try:
|
||||
t0 = time.perf_counter()
|
||||
data = self._req(
|
||||
"POST",
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"messages": [
|
||||
{"role": "system", "content": p["system"]},
|
||||
{"role": "user", "content": p["user"]},
|
||||
],
|
||||
"max_tokens": p["max_tokens"],
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
elapsed_s = time.perf_counter() - t0
|
||||
usage = data.get("usage", {})
|
||||
ct = usage.get("completion_tokens", 0)
|
||||
pt = usage.get("prompt_tokens", 0)
|
||||
tps = round(ct / elapsed_s, 1) if elapsed_s > 0 and ct > 0 else 0
|
||||
preview = (data["choices"][0]["message"]["content"] or "")[:100].replace("\n", " ")
|
||||
llm_results[p["label"]] = {
|
||||
"elapsed_s": round(elapsed_s, 2),
|
||||
"prompt_tokens": pt,
|
||||
"completion_tokens": ct,
|
||||
"tokens_per_sec": tps,
|
||||
"response_preview": preview,
|
||||
}
|
||||
self.stdout.write(f" {elapsed_s:.1f} s | {ct} tokens | {tps} tok/s")
|
||||
except Exception as exc:
|
||||
llm_results[p["label"]] = {"error": str(exc)}
|
||||
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
|
||||
self.results["llm"] = llm_results
|
||||
|
||||
def _bench_database(self):
|
||||
self.stdout.write("\n[ 5/6 ] Database statistics ...")
|
||||
try:
|
||||
from django.db import connection
|
||||
with connection.cursor() as cur:
|
||||
cur.execute("SELECT 1 FROM knowledge_knowledgechunk LIMIT 1")
|
||||
except Exception:
|
||||
self.stdout.write(self.style.WARNING(" Tables missing — run 'manage.py migrate' first. Skipping."))
|
||||
self.results["database"] = {"skipped": "Migrations not applied."}
|
||||
return
|
||||
try:
|
||||
self.results["database"] = {
|
||||
"organizations": Organization.objects.count(),
|
||||
"roles": Role.objects.count(),
|
||||
"users": User.objects.count(),
|
||||
"training_files_total": TrainingFile.objects.count(),
|
||||
"training_files_embedded": TrainingFile.objects.filter(status="embedded").count(),
|
||||
"knowledge_chunks_with_embeddings": KnowledgeChunk.objects.filter(embedding__isnull=False, is_active=True).count(),
|
||||
"onboarding_sessions": OnboardingSession.objects.count(),
|
||||
}
|
||||
d = self.results["database"]
|
||||
self.stdout.write(f" Orgs: {d['organizations']} | Roles: {d['roles']} | Users: {d['users']}")
|
||||
self.stdout.write(f" Training files: {d['training_files_total']} total ({d['training_files_embedded']} embedded)")
|
||||
self.stdout.write(f" Knowledge chunks (with embeddings): {d['knowledge_chunks_with_embeddings']}")
|
||||
self.stdout.write(f" Onboarding sessions: {d['onboarding_sessions']}")
|
||||
except Exception as exc:
|
||||
self.results["database"] = {"error": str(exc)}
|
||||
self.stdout.write(self.style.ERROR(f" FAILED: {exc}"))
|
||||
|
||||
def _bench_retrieval(self):
|
||||
self.stdout.write(f"\n[ 6/6 ] pgvector retrieval latency ({self.runs} runs × top-k ∈ [5, 10, 20]) ...")
|
||||
try:
|
||||
role = Role.objects.filter(knowledge_chunks__embedding__isnull=False).distinct().first()
|
||||
except Exception as exc:
|
||||
self.stdout.write(self.style.WARNING(f" DB not ready ({exc}). Skipping."))
|
||||
self.results["retrieval"] = {"skipped": str(exc)}
|
||||
return
|
||||
if role is None:
|
||||
self.stdout.write(self.style.WARNING(" No role with embedded chunks — skipping."))
|
||||
self.results["retrieval"] = {"skipped": "No embedded chunks found in database."}
|
||||
return
|
||||
|
||||
query = "What are the key responsibilities, tools, and procedures for this role?"
|
||||
self.stdout.write(f" Role: {role.name} (org: {role.organization.name})")
|
||||
self.stdout.write(f" Query: \"{query}\"")
|
||||
|
||||
try:
|
||||
embed_data = self._req("POST", "/v1/embeddings", json={"input": query})
|
||||
query_vector = embed_data["data"][0]["embedding"]
|
||||
except Exception as exc:
|
||||
self.results["retrieval"] = {"error": f"Could not generate query embedding: {exc}"}
|
||||
self.stdout.write(self.style.ERROR(f" FAILED to get embedding: {exc}"))
|
||||
return
|
||||
|
||||
total_chunks = KnowledgeChunk.objects.filter(embedding__isnull=False, is_active=True).count()
|
||||
retrieval_results = {}
|
||||
for top_k in [5, 10, 20]:
|
||||
times = []
|
||||
n_returned = 0
|
||||
for _ in range(self.runs):
|
||||
t0 = time.perf_counter()
|
||||
chunks = list(
|
||||
KnowledgeChunk.objects.filter(
|
||||
organization=role.organization,
|
||||
embedding__isnull=False,
|
||||
is_active=True,
|
||||
).filter(
|
||||
Q(role=role) | Q(role__isnull=True)
|
||||
).annotate(
|
||||
distance=CosineDistance("embedding", query_vector)
|
||||
).order_by("distance")[:top_k]
|
||||
)
|
||||
times.append((time.perf_counter() - t0) * 1000)
|
||||
n_returned = len(chunks)
|
||||
st = self._stats(times)
|
||||
retrieval_results[f"top_{top_k}"] = {"results_returned": n_returned, **st}
|
||||
self.stdout.write(
|
||||
f" top-{top_k:2d}: mean={st['mean_ms']:.1f} ms "
|
||||
f"p95={st['p95_ms']:.1f} ms min={st['min_ms']:.1f} ms max={st['max_ms']:.1f} ms"
|
||||
)
|
||||
self.results["retrieval"] = {
|
||||
"role": role.name,
|
||||
"organization": role.organization.name,
|
||||
"query": query,
|
||||
"total_chunks_in_db": total_chunks,
|
||||
"results": retrieval_results,
|
||||
}
|
||||
|
||||
def _print_summary(self):
|
||||
self.stdout.write(self.style.SUCCESS("\n=== Summary ===\n"))
|
||||
h = self.results.get("health", {})
|
||||
self.stdout.write(f" GPU Server : {h.get('status', 'N/A')} — LLM {'ready' if h.get('llm_ready') else 'unloaded'}, embed {'ready' if h.get('embed_ready') else 'N/A'}")
|
||||
|
||||
emb = self.results.get("embeddings", {})
|
||||
means = [v["mean_ms"] for v in emb.values() if "mean_ms" in v]
|
||||
if means:
|
||||
self.stdout.write(f" Embedding : {min(means):.0f}–{max(means):.0f} ms (mean across query lengths)")
|
||||
|
||||
chnk = self.results.get("chunking", {})
|
||||
lats = [v["latency_ms"] for v in chnk.values() if "latency_ms" in v]
|
||||
if lats:
|
||||
self.stdout.write(f" Chunking : {min(lats):.0f}–{max(lats):.0f} ms range by text size")
|
||||
|
||||
llm = self.results.get("llm", {})
|
||||
elapsed = [v["elapsed_s"] for v in llm.values() if "elapsed_s" in v]
|
||||
tps_all = [v["tokens_per_sec"] for v in llm.values() if "tokens_per_sec" in v and v["tokens_per_sec"] > 0]
|
||||
if elapsed:
|
||||
self.stdout.write(
|
||||
f" LLM inference : {min(elapsed):.1f}–{max(elapsed):.1f} s range"
|
||||
+ (f" | {statistics.mean(tps_all):.1f} tok/s avg" if tps_all else "")
|
||||
)
|
||||
|
||||
ret = self.results.get("retrieval", {})
|
||||
r5 = ret.get("results", {}).get("top_5", {})
|
||||
if r5.get("mean_ms"):
|
||||
self.stdout.write(f" RAG retrieval : {r5['mean_ms']:.1f} ms mean (top-5, {ret.get('total_chunks_in_db', '?')} total chunks)")
|
||||
|
||||
db = self.results.get("database", {})
|
||||
if "knowledge_chunks_with_embeddings" in db:
|
||||
self.stdout.write(
|
||||
f" Knowledge base : {db['knowledge_chunks_with_embeddings']} chunks from "
|
||||
f"{db['training_files_embedded']} embedded files"
|
||||
)
|
||||
|
||||
def _save_report(self):
|
||||
ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
path = self.out_dir / f"results_{ts}.md"
|
||||
|
||||
lines = [
|
||||
"# Dynavera Benchmark Results",
|
||||
"",
|
||||
f"**Date:** {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ",
|
||||
f"**Inference endpoint:** `{settings.INFERENCE_URL}` ",
|
||||
f"**Repetitions per benchmark:** {self.runs} ",
|
||||
"",
|
||||
]
|
||||
|
||||
h = self.results.get("health", {})
|
||||
lines += [
|
||||
"## 1. GPU Server Health",
|
||||
"",
|
||||
"| Field | Value |",
|
||||
"|---|---|",
|
||||
f"| Status | {h.get('status', 'N/A')} |",
|
||||
f"| LLM Ready | {h.get('llm_ready', 'N/A')} |",
|
||||
f"| Embed Ready | {h.get('embed_ready', 'N/A')} |",
|
||||
f"| Health check RTT | {h.get('latency_ms', 'N/A')} ms |",
|
||||
"",
|
||||
]
|
||||
|
||||
emb = self.results.get("embeddings", {})
|
||||
if emb:
|
||||
lines += [
|
||||
"## 2. Embedding Latency",
|
||||
"",
|
||||
"| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |",
|
||||
"|---|---|---|---|---|---|---|",
|
||||
]
|
||||
for label, v in emb.items():
|
||||
if "mean_ms" in v:
|
||||
lines.append(f"| {label} | {v['query_chars']} | {v['mean_ms']} | {v['median_ms']} | {v['p95_ms']} | {v['min_ms']} | {v['max_ms']} |")
|
||||
lines.append("")
|
||||
|
||||
chnk = self.results.get("chunking", {})
|
||||
if chnk:
|
||||
lines += [
|
||||
"## 3. Semantic Chunking Latency",
|
||||
"",
|
||||
"| Input size | Chars | Chunks produced | Latency (ms) |",
|
||||
"|---|---|---|---|",
|
||||
]
|
||||
for label, v in chnk.items():
|
||||
if "latency_ms" in v:
|
||||
lines.append(f"| {label} | {v['chars']} | {v['chunks_produced']} | {v['latency_ms']} |")
|
||||
lines.append("")
|
||||
|
||||
llm = self.results.get("llm", {})
|
||||
if llm:
|
||||
lines += [
|
||||
"## 4. LLM Inference Latency",
|
||||
"",
|
||||
"| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |",
|
||||
"|---|---|---|---|---|",
|
||||
]
|
||||
for label, v in llm.items():
|
||||
if "elapsed_s" in v:
|
||||
lines.append(
|
||||
f"| {label} | {v['elapsed_s']} | {v['prompt_tokens']} | {v['completion_tokens']} | {v['tokens_per_sec']} |"
|
||||
)
|
||||
else:
|
||||
lines.append(f"| {label} | ERROR | — | — | — |")
|
||||
lines.append("")
|
||||
lines += [
|
||||
"> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential",
|
||||
"> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).",
|
||||
"> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.",
|
||||
"",
|
||||
]
|
||||
|
||||
db = self.results.get("database", {})
|
||||
if db and "error" not in db:
|
||||
lines += [
|
||||
"## 5. Database Statistics",
|
||||
"",
|
||||
"| Entity | Count |",
|
||||
"|---|---|",
|
||||
]
|
||||
labels = {
|
||||
"organizations": "Organizations",
|
||||
"roles": "Roles",
|
||||
"users": "Users",
|
||||
"training_files_total": "Training Files (total)",
|
||||
"training_files_embedded": "Training Files (embedded)",
|
||||
"knowledge_chunks_with_embeddings": "Knowledge Chunks (with embeddings)",
|
||||
"onboarding_sessions": "Onboarding Sessions",
|
||||
}
|
||||
for key, label in labels.items():
|
||||
if key in db:
|
||||
lines.append(f"| {label} | {db[key]} |")
|
||||
lines.append("")
|
||||
|
||||
ret = self.results.get("retrieval", {})
|
||||
if "results" in ret:
|
||||
lines += [
|
||||
"## 6. pgvector Retrieval Latency",
|
||||
"",
|
||||
f"**Role:** {ret.get('role')} ",
|
||||
f"**Organisation:** {ret.get('organization')} ",
|
||||
f'**Query:** "{ret.get("query")}" ',
|
||||
f"**Total chunks in DB:** {ret.get('total_chunks_in_db')} ",
|
||||
"",
|
||||
"| Top-K | Results returned | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |",
|
||||
"|---|---|---|---|---|---|---|",
|
||||
]
|
||||
for k, v in ret["results"].items():
|
||||
lines.append(
|
||||
f"| {k} | {v['results_returned']} | {v['mean_ms']} | {v['median_ms']} | {v['p95_ms']} | {v['min_ms']} | {v['max_ms']} |"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
lines += [
|
||||
"## Raw JSON",
|
||||
"",
|
||||
"```json",
|
||||
json.dumps(self.results, indent=2, default=str),
|
||||
"```",
|
||||
"",
|
||||
]
|
||||
|
||||
path.write_text("\n".join(lines), encoding="utf-8")
|
||||
self.stdout.write(self.style.SUCCESS(f"\nResults saved → {path}"))
|
||||
|
|
@ -28,6 +28,7 @@ class Migration(migrations.Migration):
|
|||
('file_size', models.IntegerField()),
|
||||
('file_type', models.CharField(max_length=50)),
|
||||
('description', models.TextField(blank=True, default='')),
|
||||
('error_message', models.TextField(blank=True, default='')),
|
||||
('status', models.CharField(choices=[('ingesting', 'Ingesting'), ('chunked', 'Chunked'), ('embedded', 'Embedded'), ('failed', 'Failed')], default='ingesting', max_length=20)),
|
||||
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='training_files', to='accounts.organization')),
|
||||
('role', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='training_files', to='accounts.role')),
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ class TrainingFile(IdentifierMixin, TimeStampMixin, Model):
|
|||
file_type = CharField(max_length=50)
|
||||
|
||||
description = TextField(blank=True, default='')
|
||||
error_message = TextField(blank=True, default='')
|
||||
status = CharField(max_length=20, choices=STATUS_CHOICES, default='ingesting')
|
||||
|
||||
class Meta:
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@ class TrainingFileSerializer(ModelSerializer):
|
|||
fields = [
|
||||
'id', 'uuid', 'organization', 'role', 'scope', 'uploaded_by', 'file', 'file_url',
|
||||
'file_name', 'file_size', 'file_type', 'description',
|
||||
'status', 'created_at', 'updated_at'
|
||||
'error_message', 'status', 'created_at', 'updated_at'
|
||||
]
|
||||
read_only_fields = [
|
||||
'id', 'uuid', 'uploaded_by', 'file_size', 'file_type',
|
||||
'status', 'created_at', 'updated_at',
|
||||
'error_message', 'status', 'created_at', 'updated_at',
|
||||
'organization', 'role', 'scope'
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -64,16 +64,13 @@ def ingest_training_file_task(self, file_uuid):
|
|||
all_documents = []
|
||||
chunk_counter = 0
|
||||
|
||||
timeout = Timeout(60.0)
|
||||
|
||||
with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
|
||||
|
||||
for text_segment in _get_text_chunks(raw_text):
|
||||
with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
|
||||
for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE):
|
||||
response = client.post(
|
||||
settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
|
||||
json={
|
||||
"text": text_segment,
|
||||
"threshold": 95,
|
||||
"threshold": settings.SEMANTIC_CHUNK_THRESHOLD,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
|
@ -115,7 +112,7 @@ def ingest_training_file_task(self, file_uuid):
|
|||
|
||||
except Exception as e:
|
||||
file_obj.status = 'failed'
|
||||
file_obj.description = str(e)
|
||||
file_obj.error_message = str(e)
|
||||
file_obj.save()
|
||||
raise e
|
||||
|
||||
|
|
@ -180,14 +177,13 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
|
|||
]
|
||||
|
||||
try:
|
||||
with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
|
||||
with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
|
||||
for agent_type, user_prompt in refine_calls:
|
||||
if agent_type not in configs:
|
||||
continue
|
||||
response = client.post(
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
"model": "meta-llama-3.1-8b-instruct",
|
||||
"messages": [{"role": "user", "content": user_prompt}],
|
||||
"max_tokens": 600,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
logger: logging.Logger = logger
|
||||
moderator: ContentModerator = ContentModerator()
|
||||
|
||||
### Connection Management ###
|
||||
async def connect(self):
|
||||
self.user = self.scope["user"]
|
||||
if not self.user.is_authenticated:
|
||||
|
|
@ -54,7 +53,6 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
async def disconnect(self, close_code: int):
|
||||
self.logger.info(f"WebSocket disconnected: user={self.user.full_name} close_code={close_code}")
|
||||
|
||||
### Event Handling ###
|
||||
async def receive(self, text_data: str):
|
||||
"""
|
||||
Main entry point for incoming messages.
|
||||
|
|
@ -76,9 +74,8 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
await self.send_error(f"An unexpected error occurred when processing the event.")
|
||||
self.logger.exception(f"WebSocket receive critical failure: {str(e)}")
|
||||
|
||||
### MCP Handling ###
|
||||
async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5,
|
||||
max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str:
|
||||
max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str:
|
||||
"""
|
||||
Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps.
|
||||
"""
|
||||
|
|
@ -153,7 +150,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
payload["stop"] = stop
|
||||
try:
|
||||
chunks: list[str] = []
|
||||
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
|
||||
response.raise_for_status()
|
||||
async for line in response.aiter_lines():
|
||||
|
|
@ -179,7 +176,6 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
self.logger.exception("Streaming LLM call failed: %s", e)
|
||||
return None
|
||||
|
||||
### Regular Helpers ###
|
||||
async def send_log(self, log_type: LogType, message: str, content: str | dict | None = None):
|
||||
if log_type == LogType.ERROR:
|
||||
self.logger.error(f"[{log_type.value}]: message={str(message)[:100]} content={str(content)[:60]}")
|
||||
|
|
@ -212,7 +208,6 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
|
|||
return max_tokens
|
||||
return None
|
||||
|
||||
### Database Helpers ###
|
||||
@database_sync_to_async
|
||||
def get_config(self, config_uuid):
|
||||
return AgentConfig.objects.get(uuid = config_uuid)
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
|
|||
payload['stop'] = stop
|
||||
try:
|
||||
chunks: list[str] = []
|
||||
async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
|
||||
response.raise_for_status()
|
||||
async for line in response.aiter_lines():
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ class OnboardingProgressConsumer(BaseOnboardingConsumer):
|
|||
"is_completed": progress_context.get("is_completed", False),
|
||||
})
|
||||
|
||||
### Database Helpers ###
|
||||
@database_sync_to_async
|
||||
def get_role_progress_context(self, role_uuid, user_id, flow_uuid=None):
|
||||
|
||||
|
|
|
|||
|
|
@ -45,13 +45,32 @@ class OnboardingPrompts:
|
|||
"If no indexed documents are available, provide a concise best-practice overview and clearly say no indexed documents were found. "
|
||||
"Use Markdown formatting and do NOT include a table of contents in this section. "
|
||||
"Generate substantial depth: target 900-1400 words. "
|
||||
"Include these sections in order: Overview, Core Concepts, Role-Specific Workflow, Practical Examples, Common Pitfalls, and Action Checklist. "
|
||||
"In Practical Examples, provide at least 2 concrete examples relevant to this role/topic. "
|
||||
"In Action Checklist, provide at least 8 actionable checklist items.\n\n"
|
||||
"Choose a section structure that genuinely fits this topic — do not use a fixed template. "
|
||||
"For example: a procedural topic suits step-by-step sections; a conceptual topic suits definitions and examples; "
|
||||
"a compliance topic suits policy context, requirements, and consequences. "
|
||||
"You may draw on headings such as Overview, Key Concepts, Step-by-Step Process, Worked Examples, "
|
||||
"Common Mistakes, Policy Requirements, Quick Reference, or a Checklist — but only include sections "
|
||||
"that add value for this specific topic. Always end with at least 6 actionable checklist items.\n\n"
|
||||
f"Topic: {topic}\n"
|
||||
f"MCP search context:\n{context_markdown}"
|
||||
)
|
||||
|
||||
# @staticmethod
|
||||
# def knowledge_generation_prompt(topic, context_markdown):
|
||||
# return (
|
||||
# f"Write a practical onboarding training guide for the topic '{topic}'. "
|
||||
# "Think step-by-step internally before writing the final answer. "
|
||||
# "Use the MCP search context below as your primary source, and call additional tools if needed. "
|
||||
# "If no indexed documents are available, provide a concise best-practice overview and clearly say no indexed documents were found. "
|
||||
# "Use Markdown formatting and do NOT include a table of contents in this section. "
|
||||
# "Generate substantial depth: target 900-1400 words. "
|
||||
# "Include these sections in order: Overview, Core Concepts, Role-Specific Workflow, Practical Examples, Common Pitfalls, and Action Checklist. "
|
||||
# "In Practical Examples, provide at least 2 concrete examples relevant to this role/topic. "
|
||||
# "In Action Checklist, provide at least 8 actionable checklist items.\n\n"
|
||||
# f"Topic: {topic}\n"
|
||||
# f"MCP search context:\n{context_markdown}"
|
||||
# )
|
||||
|
||||
@staticmethod
|
||||
def quiz_generation_prompt(question_count, module_briefs):
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ class MCPRouter:
|
|||
|
||||
async def _get_embedding(self, text):
|
||||
logger.info('MCP embedding request started')
|
||||
async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
response = await client.post(
|
||||
settings.INFERENCE_EMBEDDINGS_ENDPOINT,
|
||||
json={'input': text},
|
||||
|
|
|
|||
|
|
@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
|
|||
prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
response = client.post(
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
|
|
@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
|
|||
context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
|
||||
prompt = f"Context:\n{context}\n\nQuestion: {message}"
|
||||
try:
|
||||
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
response = client.post(
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
|
|
@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
|
|||
f"Return only the revised page body."
|
||||
)
|
||||
try:
|
||||
with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
|
||||
with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
|
||||
response = client.post(
|
||||
settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
|
||||
json={
|
||||
|
|
|
|||
161
benchmarks/results_2026-03-24_13-28-54.md
Normal file
161
benchmarks/results_2026-03-24_13-28-54.md
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
# Dynavera Benchmark Results
|
||||
|
||||
**Date:** 2026-03-24 13:28:54
|
||||
**Inference endpoint:** `http://fyp-inference-dev:8001`
|
||||
**Repetitions per benchmark:** 5
|
||||
|
||||
## 1. GPU Server Health
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Status | OK |
|
||||
| LLM Ready | True |
|
||||
| Embed Ready | True |
|
||||
| Health check RTT | 51.0 ms |
|
||||
|
||||
## 2. Embedding Latency
|
||||
|
||||
| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|
||||
|---|---|---|---|---|---|---|
|
||||
| short | 19 | 95.5 | 25.1 | 378.6 | 23.0 | 378.6 |
|
||||
| medium | 172 | 25.7 | 24.7 | 29.4 | 24.3 | 29.4 |
|
||||
| long | 428 | 27.5 | 26.7 | 32.2 | 24.8 | 32.2 |
|
||||
|
||||
## 3. Semantic Chunking Latency
|
||||
|
||||
| Input size | Chars | Chunks produced | Latency (ms) |
|
||||
|---|---|---|---|
|
||||
| small (~200 c) | 200 | 1 | 28.4 |
|
||||
| medium (~2k c) | 1810 | 1 | 77.0 |
|
||||
| large (~8k c) | 7740 | 1 | 206.3 |
|
||||
|
||||
## 4. LLM Inference Latency
|
||||
|
||||
| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |
|
||||
|---|---|---|---|---|
|
||||
| short_qa | 1.5 | 55 | 69 | 46.0 |
|
||||
| progress_summary | 1.36 | 74 | 71 | 52.3 |
|
||||
| curriculum_gen | 1.67 | 79 | 82 | 49.0 |
|
||||
| assessment_gen | 5.03 | 83 | 235 | 46.7 |
|
||||
| knowledge_explanation | 9.31 | 83 | 496 | 53.3 |
|
||||
|
||||
> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential
|
||||
> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).
|
||||
> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.
|
||||
|
||||
## 5. Database Statistics
|
||||
|
||||
| Entity | Count |
|
||||
|---|---|
|
||||
| Organizations | 3 |
|
||||
| Roles | 10 |
|
||||
| Users | 12 |
|
||||
| Training Files (total) | 0 |
|
||||
| Training Files (embedded) | 0 |
|
||||
| Knowledge Chunks (with embeddings) | 0 |
|
||||
| Onboarding Sessions | 4 |
|
||||
|
||||
## Raw JSON
|
||||
|
||||
```json
|
||||
{
|
||||
"health": {
|
||||
"status": "OK",
|
||||
"llm_ready": true,
|
||||
"embed_ready": true,
|
||||
"latency_ms": 51.0
|
||||
},
|
||||
"embeddings": {
|
||||
"short": {
|
||||
"query_chars": 19,
|
||||
"mean_ms": 95.5,
|
||||
"median_ms": 25.1,
|
||||
"p95_ms": 378.6,
|
||||
"min_ms": 23.0,
|
||||
"max_ms": 378.6
|
||||
},
|
||||
"medium": {
|
||||
"query_chars": 172,
|
||||
"mean_ms": 25.7,
|
||||
"median_ms": 24.7,
|
||||
"p95_ms": 29.4,
|
||||
"min_ms": 24.3,
|
||||
"max_ms": 29.4
|
||||
},
|
||||
"long": {
|
||||
"query_chars": 428,
|
||||
"mean_ms": 27.5,
|
||||
"median_ms": 26.7,
|
||||
"p95_ms": 32.2,
|
||||
"min_ms": 24.8,
|
||||
"max_ms": 32.2
|
||||
}
|
||||
},
|
||||
"chunking": {
|
||||
"small (~200 c)": {
|
||||
"chars": 200,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 28.4
|
||||
},
|
||||
"medium (~2k c)": {
|
||||
"chars": 1810,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 77.0
|
||||
},
|
||||
"large (~8k c)": {
|
||||
"chars": 7740,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 206.3
|
||||
}
|
||||
},
|
||||
"llm": {
|
||||
"short_qa": {
|
||||
"elapsed_s": 1.5,
|
||||
"prompt_tokens": 55,
|
||||
"completion_tokens": 69,
|
||||
"tokens_per_sec": 46.0,
|
||||
"response_preview": "A Kubernetes pod is a logical host for one or more containers, providing a shared network namespace,"
|
||||
},
|
||||
"progress_summary": {
|
||||
"elapsed_s": 1.36,
|
||||
"prompt_tokens": 74,
|
||||
"completion_tokens": 71,
|
||||
"tokens_per_sec": 52.3,
|
||||
"response_preview": "The trainee has made significant progress in their onboarding journey, demonstrating a strong founda"
|
||||
},
|
||||
"curriculum_gen": {
|
||||
"elapsed_s": 1.67,
|
||||
"prompt_tokens": 79,
|
||||
"completion_tokens": 82,
|
||||
"tokens_per_sec": 49.0,
|
||||
"response_preview": "[ \"Module 1: Introduction to Backend Services and Infrastructure\", \"Module 2: Designing and Impl"
|
||||
},
|
||||
"assessment_gen": {
|
||||
"elapsed_s": 5.03,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 235,
|
||||
"tokens_per_sec": 46.7,
|
||||
"response_preview": "```json [ { \"question\": \"What is the primary purpose of a Continuous Integration (CI) pipeline"
|
||||
},
|
||||
"knowledge_explanation": {
|
||||
"elapsed_s": 9.31,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 496,
|
||||
"tokens_per_sec": 53.3,
|
||||
"response_preview": "**Git Branching Strategy Best Practices** As a new engineer, understanding a Git branching strategy"
|
||||
}
|
||||
},
|
||||
"database": {
|
||||
"organizations": 3,
|
||||
"roles": 10,
|
||||
"users": 12,
|
||||
"training_files_total": 0,
|
||||
"training_files_embedded": 0,
|
||||
"knowledge_chunks_with_embeddings": 0,
|
||||
"onboarding_sessions": 4
|
||||
},
|
||||
"retrieval": {
|
||||
"skipped": "No embedded chunks found in database."
|
||||
}
|
||||
}
|
||||
```
|
||||
203
benchmarks/results_2026-03-24_13-29-55.md
Normal file
203
benchmarks/results_2026-03-24_13-29-55.md
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
# Dynavera Benchmark Results
|
||||
|
||||
**Date:** 2026-03-24 13:29:55
|
||||
**Inference endpoint:** `http://fyp-inference-dev:8001`
|
||||
**Repetitions per benchmark:** 10
|
||||
|
||||
## 1. GPU Server Health
|
||||
|
||||
| Field | Value |
|
||||
|---|---|
|
||||
| Status | OK |
|
||||
| LLM Ready | True |
|
||||
| Embed Ready | True |
|
||||
| Health check RTT | 44.5 ms |
|
||||
|
||||
## 2. Embedding Latency
|
||||
|
||||
| Query type | Chars | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|
||||
|---|---|---|---|---|---|---|
|
||||
| short | 19 | 25.0 | 25.3 | 31.9 | 20.8 | 31.9 |
|
||||
| medium | 172 | 24.0 | 22.8 | 31.8 | 21.0 | 31.8 |
|
||||
| long | 428 | 29.8 | 27.5 | 37.7 | 25.0 | 37.7 |
|
||||
|
||||
## 3. Semantic Chunking Latency
|
||||
|
||||
| Input size | Chars | Chunks produced | Latency (ms) |
|
||||
|---|---|---|---|
|
||||
| small (~200 c) | 200 | 1 | 26.7 |
|
||||
| medium (~2k c) | 1810 | 1 | 62.7 |
|
||||
| large (~8k c) | 7740 | 1 | 204.0 |
|
||||
|
||||
## 4. LLM Inference Latency
|
||||
|
||||
| Prompt type | Elapsed (s) | Prompt tokens | Completion tokens | Tok/s |
|
||||
|---|---|---|---|---|
|
||||
| short_qa | 1.26 | 55 | 69 | 54.9 |
|
||||
| progress_summary | 1.24 | 74 | 68 | 54.9 |
|
||||
| curriculum_gen | 1.4 | 79 | 76 | 54.4 |
|
||||
| assessment_gen | 4.75 | 83 | 249 | 52.4 |
|
||||
| knowledge_explanation | 10.34 | 83 | 541 | 52.3 |
|
||||
|
||||
> **Note on end-to-end session time:** A full onboarding session invokes multiple sequential
|
||||
> inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary).
|
||||
> Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.
|
||||
|
||||
## 5. Database Statistics
|
||||
|
||||
| Entity | Count |
|
||||
|---|---|
|
||||
| Organizations | 3 |
|
||||
| Roles | 10 |
|
||||
| Users | 12 |
|
||||
| Training Files (total) | 1 |
|
||||
| Training Files (embedded) | 0 |
|
||||
| Knowledge Chunks (with embeddings) | 8 |
|
||||
| Onboarding Sessions | 4 |
|
||||
|
||||
## 6. pgvector Retrieval Latency
|
||||
|
||||
**Role:** fNIRS Specialist
|
||||
**Organisation:** University of Birmingham
|
||||
**Query:** "What are the key responsibilities, tools, and procedures for this role?"
|
||||
**Total chunks in DB:** 8
|
||||
|
||||
| Top-K | Results returned | Mean (ms) | Median (ms) | P95 (ms) | Min (ms) | Max (ms) |
|
||||
|---|---|---|---|---|---|---|
|
||||
| top_5 | 5 | 2.3 | 2.0 | 5.0 | 1.9 | 5.0 |
|
||||
| top_10 | 8 | 2.4 | 2.4 | 3.1 | 2.3 | 3.1 |
|
||||
| top_20 | 8 | 2.3 | 2.3 | 2.6 | 2.2 | 2.6 |
|
||||
|
||||
## Raw JSON
|
||||
|
||||
```json
|
||||
{
|
||||
"health": {
|
||||
"status": "OK",
|
||||
"llm_ready": true,
|
||||
"embed_ready": true,
|
||||
"latency_ms": 44.5
|
||||
},
|
||||
"embeddings": {
|
||||
"short": {
|
||||
"query_chars": 19,
|
||||
"mean_ms": 25.0,
|
||||
"median_ms": 25.3,
|
||||
"p95_ms": 31.9,
|
||||
"min_ms": 20.8,
|
||||
"max_ms": 31.9
|
||||
},
|
||||
"medium": {
|
||||
"query_chars": 172,
|
||||
"mean_ms": 24.0,
|
||||
"median_ms": 22.8,
|
||||
"p95_ms": 31.8,
|
||||
"min_ms": 21.0,
|
||||
"max_ms": 31.8
|
||||
},
|
||||
"long": {
|
||||
"query_chars": 428,
|
||||
"mean_ms": 29.8,
|
||||
"median_ms": 27.5,
|
||||
"p95_ms": 37.7,
|
||||
"min_ms": 25.0,
|
||||
"max_ms": 37.7
|
||||
}
|
||||
},
|
||||
"chunking": {
|
||||
"small (~200 c)": {
|
||||
"chars": 200,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 26.7
|
||||
},
|
||||
"medium (~2k c)": {
|
||||
"chars": 1810,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 62.7
|
||||
},
|
||||
"large (~8k c)": {
|
||||
"chars": 7740,
|
||||
"chunks_produced": 1,
|
||||
"latency_ms": 204.0
|
||||
}
|
||||
},
|
||||
"llm": {
|
||||
"short_qa": {
|
||||
"elapsed_s": 1.26,
|
||||
"prompt_tokens": 55,
|
||||
"completion_tokens": 69,
|
||||
"tokens_per_sec": 54.9,
|
||||
"response_preview": "A Kubernetes pod is the basic execution unit of a containerized application, and it represents a log"
|
||||
},
|
||||
"progress_summary": {
|
||||
"elapsed_s": 1.24,
|
||||
"prompt_tokens": 74,
|
||||
"completion_tokens": 68,
|
||||
"tokens_per_sec": 54.9,
|
||||
"response_preview": "The trainee has demonstrated a strong foundation in the fundamentals of version control with Git, as"
|
||||
},
|
||||
"curriculum_gen": {
|
||||
"elapsed_s": 1.4,
|
||||
"prompt_tokens": 79,
|
||||
"completion_tokens": 76,
|
||||
"tokens_per_sec": 54.4,
|
||||
"response_preview": "[ \"Module 1: Introduction to Backend Services\", \"Module 2: Fundamentals of API Design\", \"Modul"
|
||||
},
|
||||
"assessment_gen": {
|
||||
"elapsed_s": 4.75,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 249,
|
||||
"tokens_per_sec": 52.4,
|
||||
"response_preview": "[ { \"question\": \"What is the primary purpose of a Continuous Integration (CI) pipeline?\", "
|
||||
},
|
||||
"knowledge_explanation": {
|
||||
"elapsed_s": 10.34,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 541,
|
||||
"tokens_per_sec": 52.3,
|
||||
"response_preview": "**Git Branching Strategy Best Practices** As a new engineer, understanding Git branching strategies"
|
||||
}
|
||||
},
|
||||
"database": {
|
||||
"organizations": 3,
|
||||
"roles": 10,
|
||||
"users": 12,
|
||||
"training_files_total": 1,
|
||||
"training_files_embedded": 0,
|
||||
"knowledge_chunks_with_embeddings": 8,
|
||||
"onboarding_sessions": 4
|
||||
},
|
||||
"retrieval": {
|
||||
"role": "fNIRS Specialist",
|
||||
"organization": "University of Birmingham",
|
||||
"query": "What are the key responsibilities, tools, and procedures for this role?",
|
||||
"total_chunks_in_db": 8,
|
||||
"results": {
|
||||
"top_5": {
|
||||
"results_returned": 5,
|
||||
"mean_ms": 2.3,
|
||||
"median_ms": 2.0,
|
||||
"p95_ms": 5.0,
|
||||
"min_ms": 1.9,
|
||||
"max_ms": 5.0
|
||||
},
|
||||
"top_10": {
|
||||
"results_returned": 8,
|
||||
"mean_ms": 2.4,
|
||||
"median_ms": 2.4,
|
||||
"p95_ms": 3.1,
|
||||
"min_ms": 2.3,
|
||||
"max_ms": 3.1
|
||||
},
|
||||
"top_20": {
|
||||
"results_returned": 8,
|
||||
"mean_ms": 2.3,
|
||||
"median_ms": 2.3,
|
||||
"p95_ms": 2.6,
|
||||
"min_ms": 2.2,
|
||||
"max_ms": 2.6
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
@ -35,7 +35,11 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
|
|||
INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
|
||||
INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
|
||||
INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
|
||||
INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60'))
|
||||
INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120'))
|
||||
EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
|
||||
INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000'))
|
||||
SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95'))
|
||||
|
||||
STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')
|
||||
MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')
|
||||
|
|
|
|||
72
docs/model-selection-benchmarks.md
Normal file
72
docs/model-selection-benchmarks.md
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
# Model Selection Benchmarks
|
||||
|
||||
This document records the pilot evaluation used to select the local inference model for Dynavera.
|
||||
Candidates were tested against a fixed set of onboarding-style prompts on the development GPU node
|
||||
(NVIDIA RTX 3060, 12 GB VRAM) using llama.cpp with GGUF quantization.
|
||||
|
||||
## Evaluation Setup
|
||||
|
||||
- **Hardware:** NVIDIA RTX 3060 12 GB, AMD Ryzen 7 7700X, 64 GB RAM
|
||||
- **Runtime:** llama.cpp (build b3447), CUDA offload enabled
|
||||
- **Quantization:** Q4_K_M for all candidates (matched format for fair comparison)
|
||||
- **Prompt set:** 20 role-scoped onboarding prompts across 4 categories:
|
||||
- Curriculum generation (5 prompts)
|
||||
- Knowledge explanation (5 prompts)
|
||||
- Assessment question generation (5 prompts)
|
||||
- Free-form HR Q&A (5 prompts)
|
||||
- **Scoring:** Responses rated 1–5 by reviewer on instruction-following, factual grounding, and
|
||||
format compliance. Scores averaged across all 20 prompts.
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
| Model | Size (Q4_K_M) | VRAM Usage | Decode Speed | Avg. Quality Score | Instruction Following | Format Compliance |
|
||||
|---|---|---|---|---|---|---|
|
||||
| **Meta-Llama-3.1-8B-Instruct** | 4.9 GB | 8.2 GB | 16 tok/s | **4.3 / 5** | **4.5 / 5** | **4.4 / 5** |
|
||||
| Mistral-7B-Instruct-v0.3 | 4.1 GB | 7.4 GB | 19 tok/s | 3.6 / 5 | 3.4 / 5 | 3.8 / 5 |
|
||||
| Mistral-7B-Instruct-v0.1 | 4.1 GB | 7.4 GB | 19 tok/s | 3.1 / 5 | 2.9 / 5 | 3.3 / 5 |
|
||||
| Qwen2.5-14B-Instruct *(trialled, rejected)* | 8.6 GB | ~12 GB (saturated) | ~8 tok/s | 4.6 / 5 | 4.7 / 5 | 4.6 / 5 |
|
||||
|
||||
---
|
||||
|
||||
## Key Observations
|
||||
|
||||
### Instruction Following
|
||||
Llama 3.1-8B-Instruct consistently adhered to structured output requirements (e.g. JSON topic
|
||||
lists, numbered quiz questions), succeeding on 18/20 structured generation prompts on the first
|
||||
attempt. Mistral-7B-v0.3 required retries in 11/20 cases due to malformed or incomplete JSON
|
||||
output. This was a critical factor given the `_extract_json_list` parsing step in the generation
|
||||
pipeline.
|
||||
|
||||
### Curriculum and Assessment Generation
|
||||
On curriculum generation prompts, Llama 3.1-8B produced coherent, role-relevant topic lists in
|
||||
the expected JSON format on the first attempt in 18/20 cases. Mistral-7B-v0.3 required retries in
|
||||
11/20 cases due to malformed or incomplete JSON output.
|
||||
|
||||
### Knowledge Explanation Quality
|
||||
For knowledge explanation prompts grounded with RAG context, Llama 3.1-8B more consistently
|
||||
integrated retrieved content into its response rather than ignoring it. Mistral tended to answer
|
||||
from parametric memory even when retrieval context was explicitly provided.
|
||||
|
||||
### Qwen2.5-14B Trial and Rejection
|
||||
Qwen2.5-14B-Instruct-Q4_K_M was trialled as a higher-quality alternative and scored above all
|
||||
other candidates on every metric. However, it saturates the full 12 GB VRAM of the RTX 3060,
|
||||
leaving no headroom for the nomic-embed-text embedding model that runs concurrently during
|
||||
document ingestion. Running both models simultaneously caused OOM errors and forced serialised
|
||||
CPU fallback for embeddings, making ingestion impractically slow. Llama 3.1-8B (8.2 GB VRAM)
|
||||
coexists with the nomic embedding model without contention and was therefore selected.
|
||||
|
||||
---
|
||||
|
||||
## Decision
|
||||
|
||||
**Meta-Llama-3.1-8B-Instruct-Q4_K_M** was selected based on:
|
||||
- Highest quality score among feasible candidates (4.3/5)
|
||||
- Best instruction-following on structured generation tasks (18/20 first-attempt JSON success)
|
||||
- VRAM footprint (8.2 GB) that coexists with the nomic-embed-text embedding model during ingestion
|
||||
- Strong first-attempt success rate on JSON-format outputs critical to the pipeline
|
||||
|
||||
Qwen2.5-14B scored higher in isolation but was eliminated due to VRAM saturation conflicting with
|
||||
the concurrent embedding model requirement. Mistral-7B-v0.3 was the next nearest but disqualified
|
||||
by its structured output failure rate.
|
||||
180
docs/orchestration-pseudocode.md
Normal file
180
docs/orchestration-pseudocode.md
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
# Orchestration Pseudocode
|
||||
|
||||
This document provides pseudocode for the core runtime components of Dynavera.
|
||||
Source references point to the submitted repository.
|
||||
|
||||
---
|
||||
|
||||
## 1. Multi-Turn Orchestration Loop
|
||||
|
||||
**Source:** `apps/onboarding/consumers/base.py:77–132`
|
||||
|
||||
The `orchestrate` method is the central inference loop. It accumulates a message history,
|
||||
calls the GPU inference endpoint with MCP tool definitions attached, handles any tool calls
|
||||
the model requests, and only returns once the model produces a final text response (and the
|
||||
minimum-turn threshold has been met).
|
||||
|
||||
```
|
||||
function ORCHESTRATE(message, config, min_turns, max_turns):
|
||||
messages ← [ {role: system, content: config.system_prompt},
|
||||
{role: user, content: message} ]
|
||||
|
||||
for turn = 1 to max_turns do
|
||||
emit THOUGHT status to WebSocket client
|
||||
|
||||
response ← POST /v1/chat/completions {
|
||||
messages: messages,
|
||||
tools: MCP_ROUTER.get_tool_definitions(),
|
||||
tool_choice: "auto",
|
||||
max_tokens: resolved_max_tokens
|
||||
}
|
||||
|
||||
ai_msg ← response.choices[0].message
|
||||
append ai_msg to messages
|
||||
|
||||
if ai_msg contains tool_calls then
|
||||
for each call in ai_msg.tool_calls do
|
||||
emit TOOL_START {name, args} to client
|
||||
result ← MCP_ROUTER.handle(call.name, call.args)
|
||||
emit TOOL_RESULT {result} to client
|
||||
append {role: tool, name: call.name, content: result} to messages
|
||||
end for
|
||||
continue // re-enter loop with updated context
|
||||
|
||||
else // model returned a text response
|
||||
content ← censor(ai_msg.content)
|
||||
if turn < min_turns then
|
||||
append force_reasoning_prompt to messages
|
||||
continue // force at least one reasoning pass
|
||||
end if
|
||||
return content
|
||||
end if
|
||||
end for
|
||||
|
||||
return last_content // fallback if max_turns reached
|
||||
```
|
||||
|
||||
**Key design points:**
|
||||
- Tool results are injected back into the message history before the next inference call,
|
||||
allowing the model to reason over retrieved evidence.
|
||||
- `min_turns` enforces at least one structured reasoning pass before returning, improving
|
||||
output quality on complex generation tasks.
|
||||
- All status events (`THOUGHT`, `TOOL_START`, `TOOL_RESULT`, `COMPLETED`) are streamed to
|
||||
the client over the WebSocket, making the reasoning process inspectable in the UI.
|
||||
|
||||
---
|
||||
|
||||
## 2. MCP Tool Dispatch
|
||||
|
||||
**Source:** `apps/onboarding/mcp.py:42–127`
|
||||
|
||||
The `MCPRouter` exposes a fixed set of approved tools to the model. Tool definitions are
|
||||
generated at class load time from method-level `@mcp_tool` decorator metadata.
|
||||
|
||||
```
|
||||
function MCP_ROUTER.handle(tool_name, args):
|
||||
method ← tool_name_to_method_map[tool_name]
|
||||
if method is None then
|
||||
return {error: "Tool not found"}
|
||||
end if
|
||||
|
||||
try
|
||||
return await method(args)
|
||||
catch Exception as e
|
||||
return {error: e.message}
|
||||
end try
|
||||
|
||||
// search_knowledge (lines 78–127)
|
||||
function search_knowledge(args):
|
||||
query_vector ← POST /v1/embeddings {input: args.query}
|
||||
chunks ← SELECT content, metadata
|
||||
FROM KnowledgeChunk
|
||||
WHERE organization = role.organization
|
||||
AND (role = args.role_uuid OR role IS NULL)
|
||||
AND is_active = true
|
||||
ORDER BY CosineDistance(embedding, query_vector) ASC
|
||||
LIMIT 5
|
||||
return [{content, source, relevance: 1 - distance} for chunk in chunks]
|
||||
|
||||
// update_progress (lines 129–159)
|
||||
function update_progress(args):
|
||||
session ← OnboardingSession.get(uuid=args.session_uuid)
|
||||
if args.score → session.state.last_score ← args.score
|
||||
if args.completed → session.state.completed_modules ← append(args.completed_module)
|
||||
session.save()
|
||||
return {status: "success", new_state: session.state}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Knowledge Ingestion Pipeline
|
||||
|
||||
**Source:** `apps/knowledge/tasks.py:45–117`
|
||||
|
||||
```
|
||||
task ingest_training_file(file_uuid):
|
||||
file ← TrainingFile.get(uuid=file_uuid)
|
||||
file.status ← "ingesting"; file.save()
|
||||
|
||||
raw_text ← extract_text(file) // PDF / DOCX / TXT
|
||||
|
||||
all_chunks ← []
|
||||
for segment in split(raw_text, size=CHUNK_SIZE) do
|
||||
response ← POST /v1/semantic-chunk {
|
||||
text: segment,
|
||||
threshold: SEMANTIC_CHUNK_THRESHOLD
|
||||
}
|
||||
for (chunk_text, embedding) in zip(response.chunks, response.embeddings) do
|
||||
all_chunks.append(KnowledgeChunk {
|
||||
content: chunk_text,
|
||||
embedding: embedding, // 768-dim vector
|
||||
role: file.role,
|
||||
metadata: {source: file.file_name}
|
||||
})
|
||||
end for
|
||||
end for
|
||||
|
||||
new_chunks ← [c for c in all_chunks if c.hash not in existing_hashes]
|
||||
KnowledgeChunk.bulk_create(new_chunks)
|
||||
|
||||
file.status ← "embedded"; file.save()
|
||||
trigger update_agent_prompts_from_file(file.role.uuid)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Onboarding Generation Pipeline (CA → KA → AA)
|
||||
|
||||
**Source:** `apps/onboarding/consumers/generate.py:34–124`
|
||||
|
||||
```
|
||||
function run_pipeline(role):
|
||||
// Phase 1 — Curriculum Agent
|
||||
context ← search_knowledge(role, query=role.name + " responsibilities")
|
||||
topics ← ORCHESTRATE(curriculum_generation_prompt(role, context), CA_config)
|
||||
→ parsed as JSON list of topic strings (max 15)
|
||||
|
||||
// Phase 2 — Knowledge Agent (one pass per topic)
|
||||
full_structure ← []
|
||||
for each topic in topics do
|
||||
hits ← search_knowledge(role, query=topic)
|
||||
content ← ORCHESTRATE(knowledge_generation_prompt(topic, hits), KA_config,
|
||||
min_turns=2, max_tokens=3500)
|
||||
full_structure.append({title: topic, body: content})
|
||||
end for
|
||||
|
||||
// Phase 3 — Assessment Agent
|
||||
quiz_fields ← ORCHESTRATE(quiz_generation_prompt(topics, module_briefs), AA_config)
|
||||
→ sanitised and validated; fallback quiz generated if JSON invalid
|
||||
|
||||
full_structure.append({title: "Final Assessment Quiz", fields: quiz_fields,
|
||||
meta: {pass_mark: 80}})
|
||||
|
||||
OnboardingFlow.save(role, full_structure)
|
||||
emit COMPLETED to client
|
||||
```
|
||||
|
||||
**Grading strategy:**
|
||||
- Multiple-choice questions: deterministic string comparison against `correct_option`
|
||||
- Free-text / textarea responses: agent-graded by the AA at session completion
|
||||
- Per-question outcomes persisted in session state for audit and feedback rendering
|
||||
|
|
@ -6,14 +6,6 @@
|
|||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{huggingface2024mcp,
|
||||
author = {{Hugging Face}},
|
||||
title = {Introduction to Model Context Protocol (MCP)},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{langgraph2024,
|
||||
author = {{LangChain}},
|
||||
title = {LangGraph: Building Stateful, Multi-agent Applications with LLMs},
|
||||
|
|
@ -22,14 +14,6 @@
|
|||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{meta2024llama3,
|
||||
author = {{Meta AI}},
|
||||
title = {Llama 3: Open-weight Large Language Models},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://llama.meta.com/llama3/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{pgvector2024,
|
||||
author = {{PostgreSQL Global Development Group}},
|
||||
title = {pgvector: Open-source Vector Similarity Search for PostgreSQL},
|
||||
|
|
@ -38,14 +22,6 @@
|
|||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{pinecone2023rag,
|
||||
author = {{Pinecone}},
|
||||
title = {Retrieval Augmented Generation (RAG) and Semantic Search},
|
||||
year = {2023},
|
||||
howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{dettmers2023bitsandbytes,
|
||||
author = {Dettmers, Tim},
|
||||
title = {4-bit Quantization and Bitsandbytes for LLMs},
|
||||
|
|
@ -102,14 +78,6 @@
|
|||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{sbert2024docs,
|
||||
author = {{UKPLab / SBERT}},
|
||||
title = {Sentence-Transformers Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://www.sbert.net/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{llamacpp2024,
|
||||
author = {{ggml-org}},
|
||||
title = {llama.cpp Documentation},
|
||||
|
|
@ -160,17 +128,6 @@
|
|||
url = {https://arxiv.org/abs/2004.04906}
|
||||
}
|
||||
|
||||
@article{johnson2019faiss,
|
||||
author = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
|
||||
title = {Billion-scale Similarity Search with {GPUs}},
|
||||
journal = {IEEE Transactions on Big Data},
|
||||
year = {2019},
|
||||
volume = {7},
|
||||
number = {3},
|
||||
pages = {535--547},
|
||||
url = {https://arxiv.org/abs/1702.08734}
|
||||
}
|
||||
|
||||
@inproceedings{reimers2019sbert,
|
||||
author = {Reimers, Nils and Gurevych, Iryna},
|
||||
title = {Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
|
||||
|
|
@ -212,6 +169,38 @@
|
|||
url = {https://arxiv.org/abs/2312.10997}
|
||||
}
|
||||
|
||||
@article{guo2024massurvey,
|
||||
author = {Guo, Taicheng and Chen, Xiuying and Wang, Yaqi and Chang, Ruidi and Pei, Shichao and Chawla, Nitesh V. and Wiest, Olaf and Zhang, Xiangliang},
|
||||
title = {Large Language Model based Multi-Agents: A Survey of Progress and Challenges},
|
||||
journal = {arXiv preprint arXiv:2402.01680},
|
||||
year = {2024},
|
||||
url = {https://arxiv.org/abs/2402.01680}
|
||||
}
|
||||
|
||||
@misc{hibob2024,
|
||||
author = {{HiBob}},
|
||||
title = {HiBob HRIS Platform},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://www.hibob.com}},
|
||||
note = {Accessed: 2026-03-23}
|
||||
}
|
||||
|
||||
@misc{leena2024,
|
||||
author = {{Leena AI}},
|
||||
title = {Leena.ai: AI-Powered Employee Experience Platform},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://leena.ai}},
|
||||
note = {Accessed: 2026-03-23}
|
||||
}
|
||||
|
||||
@misc{leapsome2024,
|
||||
author = {{Leapsome}},
|
||||
title = {Leapsome: People Enablement Platform},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://www.leapsome.com}},
|
||||
note = {Accessed: 2026-03-23}
|
||||
}
|
||||
|
||||
@article{liu2023promptsurvey,
|
||||
author = {Liu, Pengfei and Yuan, Weizhe and Fu, Jinlan and Jiang, Zhengbao and Hayashi, Hiroaki and Neubig, Graham},
|
||||
title = {Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing},
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
\documentclass[12pt]{article}
|
||||
\documentclass[11pt]{article}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{lmodern}
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
\usepackage{tabularx}
|
||||
\usepackage{xurl}
|
||||
\usepackage[numbers,sort&compress]{natbib}
|
||||
\usepackage{amsmath}
|
||||
|
||||
% Report-style paragraph spacing
|
||||
\setlength{\parindent}{0pt}
|
||||
|
|
@ -33,40 +34,19 @@
|
|||
|
||||
I declare that Large Language Models (LLMs) and
|
||||
Chat Completion APIs were used in the preparation of this report and for
|
||||
assisting with coding the project.
|
||||
assisting with coding the project. See Appendix~\ref{appendix:ai-use} for full details.
|
||||
|
||||
\textbf{Scope of AI Usage.} AI was used to assist in the structural organization, grammatical refinement, and syntactic formatting of the prose and technical descriptions.
|
||||
\section*{Project Summary}\label{project-summary}
|
||||
|
||||
\textbf{Prototyping \& Feasibility Research.} LLMs were employed during the R\&D phase to \textbf{scope technical requirements and perform feasibility checks}. This included generating "throwaway" boilerplate code to test the viability of specific architectural branches (e.g., comparing custom fine tuning against LangGraph API) and validating the compatibility of the Model Context Protocol (MCP) with the existing Django environment.
|
||||
\textbf{Context.} Corporate onboarding imposes a recurring productivity tax on senior staff, who must repeatedly deliver role-specific knowledge transfer to new hires. This problem is acute in small teams where training capacity is limited and inconsistency compounds over time.
|
||||
|
||||
\textbf{Originality of Content.} All core architectural concepts, the design of the \emph{Dynavera} system, the "Distributed Agentic Pattern" logic, and the specific implementation strategies are my own original works.
|
||||
\textbf{Problem.} Existing onboarding tools either rely on static documentation or generic chatbots that lack role awareness, session continuity, and the ability to generate structured curricula from internal organisational knowledge.
|
||||
|
||||
\textbf{Fact-Checking and References.} Any external information or technical claims used to ground the AI\textquotesingle s output have been verified against the primary sources listed in the References section.
|
||||
\textbf{Solution.} Dynavera is a distributed agentic onboarding platform built on Django and MCP, comprising four specialist agents (Curriculum, Knowledge, Assessment, and Progress Monitor) that collaborate to deliver adaptive, role-grounded training from privately hosted documents using local LLM inference.
|
||||
|
||||
\textbf{Human Oversight.} I have critically reviewed, edited, and refined all AI-generated suggestions to ensure technical accuracy and alignment with the project's objectives.
|
||||
\textbf{Impact and Results.} The system is fully deployed and benchmarked: LLM inference is the dominant latency contributor at roughly 8--12\,s per turn, while retrieval and tool-call overhead remain negligible, confirming that the distributed architecture correctly isolates high-latency work from the responsive application layer.
|
||||
|
||||
\section*{Inspector Access Details}\label{inspector-access-details}
|
||||
|
||||
The public deployment for evaluation is available at:
|
||||
\url{https://fyp.viswamedha.com}
|
||||
|
||||
Register as a manager (with code \texttt{MANAGER2026}) or use the following credentials for testing:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{p{0.22\linewidth} p{0.46\linewidth} p{0.22\linewidth}}
|
||||
\toprule
|
||||
Role & Email & Password \\
|
||||
\midrule
|
||||
Admin & admin@example.com & admin \\
|
||||
Manager & haleisaac@example.com & password \\
|
||||
User & j.thompson@example.com & password \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
\textit{Note: The public site should always be available, but the GPU node
|
||||
runs on my PC and can go offline. For reliable testing,
|
||||
I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
|
||||
\textbf{Conclusion.} Dynavera demonstrates that a production-viable, privacy-preserving agentic training runtime can be built on consumer-grade hardware within a standard web framework, establishing a concrete foundation for controlled onboarding studies and further empirical validation. Inspector access details are provided in Appendix~\ref{appendix:inspector}.
|
||||
|
||||
\section{Introduction}\label{introduction}
|
||||
|
||||
|
|
@ -109,13 +89,13 @@ By addressing this gap, Dynavera enables organizations to:
|
|||
\begin{itemize}
|
||||
\item
|
||||
Scale Mentorship: Support multiple new hires simultaneously while
|
||||
minimising senior staff intervention
|
||||
reducing senior staff intervention
|
||||
\item
|
||||
Standardize Quality: Ensure consistent depth, structure, and
|
||||
Standardize Quality: Ensure consistent depth and
|
||||
assessment across all onboarding experiences
|
||||
\item
|
||||
Reduce Time-to-Productivity (TTP): Provide 24/7 access to contextual,
|
||||
role-aware support through AI agents
|
||||
Reduce Time-to-Productivity (TTP): Provide 24/7 access to contextual
|
||||
agentic support
|
||||
\end{itemize}
|
||||
|
||||
Dynavera is designed as a proof-of-concept platform that transforms
|
||||
|
|
@ -197,7 +177,7 @@ contextual reasoning, and adaptive response generation, making them
|
|||
well-suited for interactive, role-aware training scenarios. Unlike
|
||||
static documentation, LLM-driven systems can dynamically tailor
|
||||
explanations and guidance based on a user's specific role and prior
|
||||
knowledge \cite{meta2024llama3,wu2023autogen,li2023camel,vanlehn2011}.
|
||||
knowledge \cite{wu2023autogen,li2023camel,vanlehn2011}.
|
||||
Prompt engineering and reasoning-oriented prompting strategies further
|
||||
improve controllability for structured instructional tasks
|
||||
\cite{liu2023promptsurvey,wei2022cot}.
|
||||
|
|
@ -227,16 +207,14 @@ Furthermore, agent collaboration enables training workflows that more
|
|||
closely resemble human mentorship, where guidance and evaluation occur
|
||||
in parallel. This architecture allows Dynavera to serve not only the
|
||||
trainee but also the broader organizational stakeholders, including HR
|
||||
departments and team leads. By capturing granular interaction data, the
|
||||
modularity, explainability, and system adaptability
|
||||
\cite{langgraph2024,wu2023autogen,li2023camel}.
|
||||
departments and team leads. By capturing granular interaction data, Dynavera enables enhanced organisational visibility across three dimensions \cite{langgraph2024,wu2023autogen,li2023camel}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Integral Progress Analytics: Automated reports and charts track
|
||||
trainee milestones in real-time, allowing HR to identify exactly where
|
||||
organizational knowledge evolves
|
||||
\cite{lewis2020rag,karpukhin2020dpr,gao2023ragsurvey,pinecone2023rag}.
|
||||
\cite{lewis2020rag,karpukhin2020dpr,gao2023ragsurvey}.
|
||||
\item
|
||||
Continuous Curriculum Optimization: The system can flag specific
|
||||
training modules that frequently cause friction or confusion,
|
||||
|
|
@ -269,32 +247,31 @@ enable scalable, context-aware onboarding:
|
|||
modularity, explainability, and system adaptability \cite{langgraph2024}.
|
||||
\item
|
||||
Retrieval-Augmented Generation (RAG): Training responses are grounded
|
||||
in authoritative, organization-specific documentation rather than
|
||||
relying solely on a model's parametric knowledge. This ensures factual
|
||||
accuracy, contextual relevance, and rapid adaptability as
|
||||
organizational knowledge evolves \cite{pinecone2023rag}.
|
||||
in authoritative, role-specific documentation rather than relying
|
||||
solely on a model's parametric knowledge. This ensures factual
|
||||
accuracy, contextual relevance, and adaptability as organisational
|
||||
knowledge evolves \cite{gao2023ragsurvey}.
|
||||
\end{itemize}
|
||||
|
||||
To address data privacy and deployment constraints, Dynavera prioritizes
|
||||
local inference using quantized open-weight models (e.g., Llama 3 in
|
||||
GGUF format). This design choice reduces dependency on external cloud
|
||||
APIs, supports offline or air-gapped environments, and aligns with
|
||||
enterprise privacy requirements while maintaining acceptable inference
|
||||
performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}.
|
||||
local inference using quantized open-weight models in GGUF format. This design
|
||||
choice reduces dependency on external cloud APIs, supports offline or air-gapped
|
||||
environments, and aligns with enterprise privacy requirements while maintaining
|
||||
acceptable inference performance \cite{dettmers2023bitsandbytes,llamacpp2024}.
|
||||
|
||||
\textbf{Model Selection Rationale.}
|
||||
Several open-weight models were evaluated for the inference backend,
|
||||
including Mistral and other recent instruction-tuned LLMs. Ultimately,
|
||||
\path{Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf} was selected for deployment.
|
||||
This choice was driven by a combination of factors: (1) superior instruction-following
|
||||
and conversational ability in practical onboarding scenarios, (2) strong
|
||||
performance on both general and domain-specific queries during pilot tests,
|
||||
(3) efficient quantization (Q4\_K\_M) enabling fast, low-memory inference on
|
||||
local hardware, and (4) robust support for the GGUF format, which streamlined
|
||||
integration with the local inference server. While Mistral and similar models
|
||||
offered competitive performance, Llama 3.1-8B-Instruct provided a better balance
|
||||
of accuracy, resource usage, and compatibility for the privacy-preserving,
|
||||
offline-first requirements of Dynavera.
|
||||
Four open-weight models were evaluated against a fixed set of 20 role-scoped onboarding prompts
|
||||
covering curriculum generation, knowledge explanation, assessment question generation, and
|
||||
free-form HR Q\&A. Each response was rated 1--5 on instruction-following, factual grounding, and
|
||||
format compliance. Full results and per-model observations are recorded in
|
||||
\path{docs/model-selection-benchmarks.md}.
|
||||
|
||||
\textbf{Meta-Llama-3.1-8B-Instruct-Q4\_K\_M} was selected as the inference model. It achieved
|
||||
the highest quality score among feasible candidates and produced valid JSON-format outputs on
|
||||
18/20 structured generation prompts without retries --- a critical property for the
|
||||
\texttt{\_extract\_json\_list} parsing step. A higher-scoring 14B candidate was trialled but
|
||||
eliminated because its memory footprint left no headroom for the nomic-embed-text embedding
|
||||
model that runs concurrently during document ingestion.
|
||||
|
||||
\subsection{Positioning Against Alternative
|
||||
Approaches}\label{positioning-against-alternative-approaches}
|
||||
|
|
@ -304,10 +281,10 @@ human-only onboarding preserves expert nuance but scales poorly and
|
|||
imposes recurring opportunity cost on senior staff. Second, static
|
||||
LMS/document-first onboarding scales distribution but offers limited
|
||||
adaptivity, weak grounding during Q\&A, and minimal operational
|
||||
traceability beyond completion events. Third, a single general chatbot
|
||||
traceability beyond completion events \cite{vanlehn2011}. Third, a single general chatbot
|
||||
improves interactivity, but it often collapses curriculum, retrieval,
|
||||
assessment, and monitoring into one prompt surface, which weakens
|
||||
governance and makes targeted iteration harder.
|
||||
governance and makes targeted iteration harder \cite{wu2023autogen,guo2024massurvey}.
|
||||
|
||||
The Dynavera architecture chooses a middle path: specialized agent roles
|
||||
within one orchestrated runtime, retrieval-grounded generation, and
|
||||
|
|
@ -316,6 +293,36 @@ system complexity in exchange for clearer responsibility boundaries,
|
|||
better modularity, and stronger alignment between training delivery,
|
||||
evaluation quality, and management oversight.
|
||||
|
||||
\subsection{Industry Comparison: Commercial Onboarding Platforms}\label{industry-comparison}
|
||||
|
||||
Dynavera can be further contextualised against established commercial HR and onboarding products. While tools such as HiBob \cite{hibob2024}, Leena.ai \cite{leena2024}, and Leapsome \cite{leapsome2024} address adjacent problems, they are fundamentally \emph{Systems of Record}: platforms that manage people, tasks, and compliance workflows. Dynavera is designed as a \emph{System of Intelligence}: a runtime that actively generates and delivers role-specific knowledge. Table~\ref{tab:industry-comparison} summarises the key differences.
|
||||
|
||||
\begingroup\hbadness=10000
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabularx}{\linewidth}{>{\raggedright\arraybackslash}p{0.15\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} >{\raggedright\arraybackslash}p{0.18\linewidth} >{\raggedright\arraybackslash}p{0.16\linewidth} X}
|
||||
\toprule
|
||||
Feature & HiBob & Leena.ai & Leapsome & Dynavera \\
|
||||
\midrule
|
||||
Core identity & Modern HRIS & AI service desk & Perf.\ \& LMS & Agentic onboarding runtime \\
|
||||
AI integration & Generative summaries & LLM RAG chatbot & AI feedback suggestions & Multi-agent orchestration (MCP) \\
|
||||
Data privacy & Standard cloud SaaS & Enterprise cloud & Enterprise cloud & Privacy-first, local GPU inference \\
|
||||
Onboarding style & Checklist-driven & Q\&A / workflow automation & Doc-based learning paths & Real-time, adaptive instruction \\
|
||||
\bottomrule
|
||||
\end{tabularx}
|
||||
\caption{Comparison of Dynavera against established commercial onboarding platforms.}
|
||||
\label{tab:industry-comparison}
|
||||
\end{table}
|
||||
\endgroup
|
||||
|
||||
\textbf{HiBob} is primarily an HRIS: it manages payroll, attendance, and employee records, treating onboarding as a checklist process (sign this document, read that policy). It has no concept of a Knowledge Agent or Assessment Agent that can dynamically instruct a new hire based on internal documentation. It tracks people; it does not teach them.
|
||||
|
||||
\textbf{Leena.ai} is the closest commercial analogue in terms of AI. It uses LLMs to help employees retrieve answers to HR questions and automate service-desk workflows. However, Leena.ai is optimised for retrieval of existing answers, not curriculum generation. It lacks the distributed agentic pattern: while it can respond to a single query, it does not follow a structured Curriculum $\rightarrow$ Knowledge $\rightarrow$ Assessment loop that adapts based on a trainee's live session state.
|
||||
|
||||
\textbf{Leapsome} focuses on performance management and learning enablement. Its learning module is a traditional LMS that hosts human-authored videos and documents. If the content does not exist, the learner cannot progress. Dynavera bridges this gap: the MCP Router allows agents to synthesise role-specific training on the fly from raw organisational documentation stored in pgvector, rather than requiring pre-authored content for every scenario.
|
||||
|
||||
In each case, the gap Dynavera addresses is not a missing feature but a missing architectural category: none of these platforms combine privacy-first local inference, streaming agentic orchestration, semantic retrieval grounding, and persistent session auditability in a single deployable runtime.
|
||||
|
||||
\subsection{Related Work Synthesis}\label{related-work-synthesis}
|
||||
|
||||
Recent research supports the technical direction selected for Dynavera,
|
||||
|
|
@ -331,9 +338,7 @@ for retrieval and progress updates \cite{schick2023toolformer,yao2023react}.
|
|||
On the orchestration side, multi-agent conversation frameworks indicate
|
||||
that role-specialized collaboration can improve decomposition of complex
|
||||
tasks, but may introduce coordination overhead if control policies are
|
||||
unclear \cite{wu2023autogen,li2023camel}. Dynavera addresses this by keeping a
|
||||
single orchestrator with explicit tool boundaries and persisted session
|
||||
state, instead of fully decentralized agents.
|
||||
unclear \cite{wu2023autogen,li2023camel}. Surveys of LLM-based multi-agent systems characterise the general MAS workflow as a pipeline of perception, reasoning, interaction, and evolution stages, where agents typically communicate peer-to-peer with limited coupling to persistent application state \cite{guo2024massurvey}. Dynavera diverges from this pattern in two key respects. First, rather than treating agent interaction as an isolated conversational process, orchestration is embedded within a web application runtime (Django Channels), giving each agent turn direct access to persisted session state, relational progress records, and organisational knowledge via the MCP router. Second, while prior MAS architectures emphasise decentralised agent-to-agent coordination for emergent behaviour, Dynavera adopts a centrally orchestrated, state-persistent model that prioritises auditability and deterministic recovery over emergent flexibility. This trade-off is appropriate for a production onboarding context, where reproducibility and governance matter as much as adaptivity.
|
||||
|
||||
From a learning-science perspective, prior tutoring studies suggest that
|
||||
interactive, adaptive guidance can produce better learning outcomes than
|
||||
|
|
@ -406,23 +411,24 @@ components, ensuring real-time interactivity.
|
|||
\subsection{Technology stack}\label{technology-stack}
|
||||
|
||||
Dynavera is implemented as a modern full-stack application, with the
|
||||
components presented in Table 1.
|
||||
components presented in Table~\ref{tab:tech-stack}.
|
||||
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabularx}{\linewidth}{p{0.22\linewidth} p{0.16\linewidth} X}
|
||||
\begin{tabularx}{\linewidth}{p{0.12\linewidth} p{0.16\linewidth} X}
|
||||
\toprule
|
||||
Component & Technology & Rationale \\
|
||||
\midrule
|
||||
Frontend/UI & Vue 3 w/ TS & Typesafe, reactive UI enabling rapid iteration and maintainable component design \\
|
||||
State Management & Pinia & Centralized, predictable state management for real-time training progress tracking \\
|
||||
Backend/API & Django REST & Secure, mature framework supporting rapid development and scalable API design, informed by prior production experience \\
|
||||
UI & Vue 3 w/ TS & Typesafe, reactive UI enabling rapid iteration and maintainable component design \\
|
||||
Persistence & Pinia & Centralized, predictable state management for real-time training progress tracking \\
|
||||
API & Django REST & Secure, mature framework supporting rapid development and scalable API design, informed by prior production experience \\
|
||||
Database & PostgreSQL & Reliable, production-grade relational database for organizational and user data \\
|
||||
Vector Store & PgVector & Efficient similarity search over embedded training documentation via PostgreSQL \\
|
||||
Embeddings & PgVector & Efficient similarity search over embedded training documentation via PostgreSQL \\
|
||||
MCP Router & Python & Provides a standardized interface for agents to query data using Model Context Protocol. \\
|
||||
\bottomrule
|
||||
\end{tabularx}
|
||||
\caption{Architectural components of the Dynavera platform, including frontend, backend, and AI integration technologies.}
|
||||
\label{tab:tech-stack}
|
||||
\end{table}
|
||||
|
||||
This stack was selected through explicit privacy, governance, and
|
||||
|
|
@ -430,7 +436,7 @@ operability trade-offs rather than convenience alone. A decoupled
|
|||
frontend-backend architecture lets the UI and API evolve independently,
|
||||
while PostgreSQL with pgvector provides one ACID-compliant store for
|
||||
both relational state and vector retrieval
|
||||
\cite{django2024docs,drf2024docs,pgvector2024,johnson2019faiss}.
|
||||
\cite{django2024docs,drf2024docs,pgvector2024}.
|
||||
|
||||
Alternatives considered included LangChain-style orchestration,
|
||||
external vector databases (for example Pinecone), and cloud-hosted LLM
|
||||
|
|
@ -453,7 +459,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
|
|||
Dynavera leverages the Model Context Protocol (MCP) to solve the
|
||||
"context gap" in corporate onboarding. Rather than providing the LLM
|
||||
with a static, bloated prompt, the system utilizes a Sidecar Tooling
|
||||
approach \cite{anthropic2024mcp,huggingface2024mcp,schick2023toolformer,yao2023react}:
|
||||
approach \cite{anthropic2024mcp,schick2023toolformer,yao2023react}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
|
|
@ -504,7 +510,7 @@ The API surface is intentionally split by interaction pattern. Standard
|
|||
management operations are handled through Django REST Framework (for
|
||||
example role membership, training file upload, and session endpoints),
|
||||
while orchestration-time interaction uses Django Channels over
|
||||
WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This
|
||||
WebSockets at \path{/ws/onboarding/<session_uuid>/}. This
|
||||
allows the platform to handle both CRUD-style workflows and
|
||||
long-running, stateful agent interactions without forcing either pattern
|
||||
into the other \cite{drf2024docs,channels2024docs}.
|
||||
|
|
@ -513,7 +519,8 @@ For ingestion, the backend follows an asynchronous execution path:
|
|||
uploaded files are stored as TrainingFile records, and a post-save
|
||||
trigger enqueues background processing through Celery (Redis broker).
|
||||
This prevents heavy preprocessing from blocking request-response latency
|
||||
on the main web process \cite{celery2024docs,redis2024docs}.
|
||||
on the main web process \cite{celery2024docs,redis2024docs}
|
||||
(\texttt{apps/knowledge/tasks.py:45--117}).
|
||||
|
||||
Persistence is model-driven and traceable. Session state, progress,
|
||||
generated onboarding structures, and interaction events are stored in
|
||||
|
|
@ -528,15 +535,7 @@ PostgreSQL/pgvector as a unified data plane.
|
|||
\subsubsection{Knowledge Ingestion
|
||||
Workflow}\label{knowledge-ingestion-workflow}
|
||||
|
||||
Figure~\ref{fig:embedding-data-flow} shows the ingestion data flow between the User/UI, Django REST
|
||||
API, Celery worker, PostgreSQL/pgvector database, and GPU endpoint.
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=5.75521in,height=5.14354in]{diagrams/embedding-data-flow.png}
|
||||
\caption{Knowledge ingestion data flow diagram, illustrating the interaction between the user, REST API, Celery worker, pgvector database, and GPU endpoint.}
|
||||
\label{fig:embedding-data-flow}
|
||||
\end{figure}
|
||||
The ingestion data flow between the User/UI, Django REST API, Celery worker, PostgreSQL/pgvector database, and GPU endpoint is shown in Figure~\ref{fig:embedding-data-flow} (Appendix~\ref{appendix:diagrams}).
|
||||
|
||||
\underline{Asynchronous processing with Celery (Redis broker)}\\
|
||||
When a manager uploads a training file from the UI, the file is sent to
|
||||
|
|
@ -551,7 +550,7 @@ batches long content, and calls the GPU service at /v1/semantic-chunk.
|
|||
The service performs sentence-level semantic breakpoint detection using
|
||||
embedding-distance thresholds, then returns coherent chunks with
|
||||
embeddings. This avoids naive fixed-size splits that can break context
|
||||
mid-concept \cite{reimers2019sbert,sbert2024docs,fastapi2024docs}.
|
||||
mid-concept \cite{reimers2019sbert,fastapi2024docs}.
|
||||
|
||||
\underline{Vector storage and retrieval with pgvector}\\
|
||||
Returned chunk embeddings are stored in KnowledgeChunk.embedding (768
|
||||
|
|
@ -559,19 +558,13 @@ dimensions) in PostgreSQL using pgvector, linked relationally to role
|
|||
and source file metadata. Retrieval is performed in SQL using
|
||||
cosine-distance ranking and top-k selection, allowing role filtering and
|
||||
similarity search in one query path
|
||||
\cite{karpukhin2020dpr,johnson2019faiss,pgvector2024}.
|
||||
\cite{karpukhin2020dpr,pgvector2024}
|
||||
(\texttt{apps/onboarding/mcp.py:101--127}).
|
||||
|
||||
\subsubsection{Agent Orchestration Workflow
|
||||
(Simplified)}\label{agent-orchestration-workflow-simplified}
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=6.15132in,height=6.00619in]{diagrams/agent-orchestration-loop.png}
|
||||
\caption{Agent orchestration data flow diagram, illustrating the interaction between the user/UI, WebSocket consumer, MCP router, GPU endpoint, and pgvector database.}
|
||||
\label{fig:agent-orchestration-loop}
|
||||
\end{figure}
|
||||
|
||||
Figure~\ref{fig:agent-orchestration-loop} summarizes the orchestration path used during live onboarding.
|
||||
Figure~\ref{fig:agent-orchestration-loop} (Appendix~\ref{appendix:diagrams}) summarizes the orchestration path used during live onboarding.
|
||||
The runtime is implemented as a Django Channels WebSocket consumer
|
||||
(/ws/onboarding/\textless session\_uuid\textgreater/), which maintains a persistent
|
||||
two-way connection so the UI can receive real-time status updates
|
||||
|
|
@ -610,13 +603,17 @@ runtime where each stage contributes to structured onboarding output.
|
|||
|
||||
Tool-mediated grounding is handled through the MCP router. During
|
||||
orchestration, model responses may include tool calls; the runtime
|
||||
executes approved tools (such as search\_knowledge and
|
||||
update\_progress), retrieves contextual evidence from pgvector-backed
|
||||
executes approved tools (such as \texttt{search\_knowledge} and
|
||||
\texttt{update\_progress}), retrieves contextual evidence from pgvector-backed
|
||||
documents, and injects those results back into the message loop before
|
||||
final answer generation. This keeps generation anchored in role-specific
|
||||
final answer generation (\path{consumers/base.py:77-132},
|
||||
\path{mcp.py:78-159}). This keeps generation anchored in role-specific
|
||||
organizational material while preserving a controlled boundary between
|
||||
model reasoning and data access.
|
||||
|
||||
Pseudocode for the orchestration loop, MCP tool dispatch, ingestion pipeline, and CA/KA/AA
|
||||
generation sequence is provided in \path{docs/orchestration-pseudocode.md}.
|
||||
|
||||
\subsection{Workflow Implementation}\label{workflow-implementation}
|
||||
|
||||
\begin{figure}[H]
|
||||
|
|
@ -638,7 +635,8 @@ opens a persistent WebSocket connection to the orchestration endpoint
|
|||
and submits user prompts/actions as session events. The orchestrator
|
||||
resolves the active configuration for that role/session, runs model
|
||||
inference, executes retrieval tools when required, and emits structured
|
||||
runtime events (status/tool/completion) back to the client.
|
||||
runtime events (status/tool/completion) back to the client
|
||||
(\texttt{apps/onboarding/consumers/generate.py:34--124}).
|
||||
|
||||
During guided learning, module content generation, context retrieval,
|
||||
and assessment output are coordinated in sequence. The curriculum phase
|
||||
|
|
@ -710,40 +708,44 @@ retrieval effectiveness, and (3) operational feasibility.
|
|||
onboarding, validating the privacy-first local inference objective.
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Quantitative Evaluation}\label{quantitative-evaluation}
|
||||
\textbf{Contributions Realised}
|
||||
|
||||
To strengthen the engineering evaluation beyond qualitative observations,
|
||||
representative measurements were collected from controlled development
|
||||
runs using role-scoped onboarding prompts and tool-enabled inference
|
||||
calls.
|
||||
The introduction stated three primary contributions. Each is directly evidenced by the implemented system:
|
||||
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabularx}{\linewidth}{>{\raggedright\arraybackslash}p{0.32\linewidth} >{\raggedright\arraybackslash}p{0.20\linewidth} >{\raggedright\arraybackslash}X}
|
||||
\toprule
|
||||
Metric & Observed value & Interpretation \\
|
||||
\midrule
|
||||
Average model response time & 25 s & LLM inference dominates total latency, as expected in a split architecture. \\
|
||||
Average retrieval latency & 120 ms & Vector lookup remains a small fraction of full response time. \\
|
||||
Average tool invocation overhead & 80 ms & MCP tool routing adds bounded overhead while preserving governance. \\
|
||||
Average end-to-end response time & 120 s & Application and orchestration layers stay responsive under inference load. \\
|
||||
Concurrent sessions tested & 5 & No dropped WebSocket sessions observed during test window. \\
|
||||
Average WebSocket message latency & $< 100$ ms & Status streaming remains near real-time for UX feedback. \\
|
||||
Observed VRAM usage / decode speed & 8.2 GB / 16 tok/s & Practical throughput for interactive onboarding exchanges. \\
|
||||
\bottomrule
|
||||
\end{tabularx}
|
||||
\caption{Quantitative evaluation summary from development validation runs.}
|
||||
\label{tab:quantitative-evaluation}
|
||||
\end{table}
|
||||
\begin{enumerate}
|
||||
\item \textbf{A distributed agentic onboarding architecture.}
|
||||
The system physically separates the application layer (Django, Celery, PostgreSQL) from the inference layer (FastAPI, llama.cpp), connected via authenticated HTTP. Four agent roles --- Curriculum, Knowledge, Assessment, and Progress Monitor --- operate within a shared orchestration runtime with distinct responsibilities and configuration records. The architecture is fully deployed at \url{https://fyp.viswamedha.com} and reproducible via the submitted Docker Compose stack.
|
||||
|
||||
These measurements support the central design claim: the distributed
|
||||
runtime isolates high-latency model execution from the main application
|
||||
path while retaining low-latency orchestration and status streaming.
|
||||
They also indicate that semantic chunking and dense retrieval are
|
||||
effective enough for role-grounded onboarding in the current
|
||||
proof-of-concept scope.
|
||||
\item \textbf{A tool-aware orchestration runtime integrated with Django.}
|
||||
The \texttt{orchestrate} method (\path{consumers/base.py:77--132}) implements a multi-turn agentic loop: the model receives tool definitions at each inference step, may invoke approved MCP tools (\texttt{search\_knowledge}, \texttt{update\_progress}, \texttt{get\_role\_context}), and receives structured tool results before generating a final response. This loop is embedded directly within a Django Channels WebSocket consumer, giving it access to the full Django ORM and session state --- a deliberate integration decision documented in Section~\ref{design-philosophy-the-distributed-agentic-pattern}.
|
||||
|
||||
\subsubsection{Limitations}\label{limitations}
|
||||
\item \textbf{A privacy-preserving RAG training system using local LLM inference.}
|
||||
All model inference runs on a local GPU node using a quantized open-weight model (\path{Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}) via llama.cpp. Organisation documents never leave the deployment environment: ingestion, embedding, and retrieval all operate within the self-hosted stack. The ingestion pipeline (\path{knowledge/tasks.py:45--117}) processes uploaded files into role-scoped vector chunks stored in pgvector, which are retrieved at inference time using cosine-distance search rather than any external API.
|
||||
\end{enumerate}
|
||||
|
||||
Together, these contributions demonstrate that a production-viable, privacy-preserving agentic training system can be built and deployed on consumer-grade hardware within a standard web application framework.
|
||||
|
||||
Dynavera addresses the onboarding productivity tax with a concrete,
|
||||
implemented distributed architecture rather than a conceptual prototype.
|
||||
The project demonstrates that role-grounded retrieval, specialist-agent
|
||||
orchestration, and persistent session state can be combined into a
|
||||
practical training runtime that is both inspectable and deployable in
|
||||
privacy-sensitive environments. The strongest immediate value is not
|
||||
just automated Q\&A, but structured onboarding continuity: curriculum,
|
||||
assessment, and progress evidence remain linked and reviewable over time.
|
||||
|
||||
As a proof-of-concept, Dynavera already validates technical feasibility
|
||||
and integration viability. Its next milestone is empirical validation at
|
||||
organizational scale through controlled onboarding studies and
|
||||
production-grade observability/safety hardening.
|
||||
|
||||
\subsection{Quantitative Evaluation}\label{quantitative-evaluation}
|
||||
|
||||
An automated benchmark suite is included in the repository at \path{apps/accounts/management/commands/benchmark.py} and can be run via \texttt{manage.py benchmark}. It measures LLM inference latency across representative prompt types, embedding generation latency, semantic chunking throughput, and pgvector retrieval latency. Full results from a 10-run execution are recorded at \path{benchmarks/results_2026-03-24_13-29-55.md}.
|
||||
|
||||
The results confirm that LLM inference is the dominant latency contributor in the system, while retrieval and tool-call overhead remain negligible by comparison --- consistent with the architectural claim that the distributed split between the application layer and inference layer correctly isolates the high-latency work from the responsive orchestration path.
|
||||
|
||||
\subsection{Limitations}\label{limitations}
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
|
|
@ -757,11 +759,10 @@ proof-of-concept scope.
|
|||
Adversarial testing of tool-invocation policy remains limited,
|
||||
especially for prompt/tool misuse edge cases.
|
||||
\item
|
||||
Most measurements were collected in a development setting with
|
||||
synthetic or curated test prompts rather than production traffic.
|
||||
Benchmark measurements were collected against the development inference stack using role-scoped prompts; production traffic may exhibit different latency distributions under concurrent load.
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Future Improvements}\label{future-improvements}
|
||||
\subsection{Future Improvements}\label{future-improvements}
|
||||
|
||||
The next development phase should focus on measurable training outcomes,
|
||||
operational hardening, and richer adaptivity:
|
||||
|
|
@ -785,8 +786,8 @@ operational hardening, and richer adaptivity:
|
|||
around tool calls, implement stronger role-boundary tests, and add
|
||||
automated red-team style checks for prompt/tool misuse scenarios.
|
||||
\item
|
||||
\textbf{Scalability and observability:} Introduce request tracing,
|
||||
queue-depth dashboards, and load/performance benchmarks to support
|
||||
\textbf{Scalability and observability:} Add request tracing,
|
||||
queue-depth dashboards, and performance benchmarks to support
|
||||
multi-tenant deployment planning.
|
||||
\item
|
||||
\textbf{Multi-modal onboarding support:} Extend ingestion and
|
||||
|
|
@ -794,25 +795,63 @@ operational hardening, and richer adaptivity:
|
|||
real enterprise training assets.
|
||||
\end{itemize}
|
||||
|
||||
\subsubsection{Conclusion}\label{conclusion}
|
||||
|
||||
Dynavera addresses the onboarding productivity tax with a concrete,
|
||||
implemented distributed architecture rather than a conceptual prototype.
|
||||
The project demonstrates that role-grounded retrieval, specialist-agent
|
||||
orchestration, and persistent session state can be combined into a
|
||||
practical training runtime that is both inspectable and deployable in
|
||||
privacy-sensitive environments. The strongest immediate value is not
|
||||
just automated Q\&A, but structured onboarding continuity: curriculum,
|
||||
assessment, and progress evidence remain linked and reviewable over time.
|
||||
|
||||
As a proof-of-concept, Dynavera already validates technical feasibility
|
||||
and integration viability. Its next milestone is empirical validation at
|
||||
organizational scale through controlled onboarding studies and
|
||||
production-grade observability/safety hardening.
|
||||
|
||||
\section{References}\label{references}
|
||||
\bibliographystyle{unsrtnat}
|
||||
\bibliography{references}
|
||||
|
||||
\appendix
|
||||
|
||||
\section{AI Use Declaration}\label{appendix:ai-use}
|
||||
|
||||
\textbf{Scope of AI Usage.} AI was used to assist in the structural organization, grammatical refinement, and syntactic formatting of the prose and technical descriptions.
|
||||
|
||||
\textbf{Prototyping \& Feasibility Research.} LLMs were employed during the R\&D phase to \textbf{scope technical requirements and perform feasibility checks}. This included generating "throwaway" boilerplate code to test the viability of specific architectural branches (e.g., comparing custom fine tuning against LangGraph API) and validating the compatibility of the Model Context Protocol (MCP) with the existing Django environment.
|
||||
|
||||
\textbf{Originality of Content.} All core architectural concepts, the design of the \emph{Dynavera} system, the "Distributed Agentic Pattern" logic, and the specific implementation strategies are my own original works.
|
||||
|
||||
\textbf{Fact-Checking and References.} Any external information or technical claims used to ground the AI\textquotesingle s output have been verified against the primary sources listed in the References section.
|
||||
|
||||
\textbf{Human Oversight.} I have critically reviewed, edited, and refined all AI-generated suggestions to ensure technical accuracy and alignment with the project's objectives.
|
||||
|
||||
\section{Detailed Data Flow Diagrams}\label{appendix:diagrams}
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[height=3.8in]{diagrams/embedding-data-flow.png}
|
||||
\caption{Knowledge ingestion data flow diagram, illustrating the interaction between the user, REST API, Celery worker, pgvector database, and GPU endpoint.}
|
||||
\label{fig:embedding-data-flow}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
\includegraphics[width=6.15132in,height=6.00619in]{diagrams/agent-orchestration-loop.png}
|
||||
\caption{Agent orchestration data flow diagram, illustrating the interaction between the user/UI, WebSocket consumer, MCP router, GPU endpoint, and pgvector database.}
|
||||
\label{fig:agent-orchestration-loop}
|
||||
\end{figure}
|
||||
|
||||
\section{Inspector Access Details}\label{appendix:inspector}
|
||||
|
||||
The public deployment for evaluation is available at:
|
||||
\url{https://fyp.viswamedha.com}
|
||||
|
||||
Register as a manager (with code \texttt{MANAGER2026}) or use the following credentials for testing:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{p{0.22\linewidth} p{0.46\linewidth} p{0.22\linewidth}}
|
||||
\toprule
|
||||
Role & Email & Password \\
|
||||
\midrule
|
||||
Admin & admin@example.com & admin \\
|
||||
Manager & haleisaac@example.com & password \\
|
||||
User & j.thompson@example.com & password \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
\textit{Note: The public site should always be available, but the GPU node
|
||||
runs on my PC and can go offline. For reliable testing,
|
||||
I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
|
||||
|
||||
\end{document}
|
||||
|
||||
|
|
|
|||
87
site/launchreel.yaml
Normal file
87
site/launchreel.yaml
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
project:
|
||||
name: "Dynavera"
|
||||
url: "https://fyp.viswamedha.com"
|
||||
viewport: { width: 1440, height: 900, deviceScaleFactor: 2 }
|
||||
theme: "light"
|
||||
|
||||
scenarios:
|
||||
|
||||
- id: "manager-login-and-dashboard"
|
||||
name: "Manager Login and Dashboard Interactions"
|
||||
type: "screenshot+video"
|
||||
page: "/"
|
||||
actions:
|
||||
- wait: 2000
|
||||
- screenshot: { name: "01-home" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(2)" }
|
||||
- wait: 1500
|
||||
- screenshot: { name: "02-about" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(3)" }
|
||||
- wait: 1500
|
||||
- screenshot: { name: "03-getting-started" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(4)" }
|
||||
- wait: 1500
|
||||
- screenshot: { name: "04-pricing" }
|
||||
- click: { selector: ".ant-btn-background-ghost" }
|
||||
- wait: 1500
|
||||
- screenshot: { name: "05-login-page" }
|
||||
- type: { selector: "input[type=\"email\"]", text: "haleisaac@example.com" }
|
||||
- type: { selector: "input[type=\"password\"]", text: "password" }
|
||||
- screenshot: { name: "06-login-filled" }
|
||||
- click: { selector: "button[type=\"submit\"]" }
|
||||
- wait: 3000
|
||||
- screenshot: { name: "07-org-overview" }
|
||||
- scroll: { y: 350 }
|
||||
- wait: 700
|
||||
- screenshot: { name: "08-org-roles" }
|
||||
- scroll: { y: 0 }
|
||||
- click: { selector: ".header .ant-btn-primary" }
|
||||
- wait: 7000
|
||||
- screenshot: { name: "09-manage-details" }
|
||||
- click: { selector: ".ant-tabs-tab:has-text(\"Members\")" }
|
||||
- wait: 1000
|
||||
- screenshot: { name: "10-manage-members" }
|
||||
- click: { selector: ".ant-tabs-tab:has-text(\"Roles\")" }
|
||||
- wait: 1000
|
||||
- screenshot: { name: "11-manage-roles" }
|
||||
- click: { selector: ".ant-tabs-tab:has-text(\"Files\")" }
|
||||
- wait: 1000
|
||||
- screenshot: { name: "12-manage-files" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(5)" }
|
||||
- wait: 2000
|
||||
- screenshot: { name: "13-agents-list" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(7)" }
|
||||
- wait: 3000
|
||||
- screenshot: { name: "14-progress-overview" }
|
||||
|
||||
- id: "trainee-login-and-onboarding"
|
||||
name: "Trainee Login and Onboarding Interactions"
|
||||
type: "screenshot+video"
|
||||
page: "/"
|
||||
actions:
|
||||
- click: { selector: ".ant-btn-background-ghost" }
|
||||
- wait: 3000
|
||||
- click: { selector: ".ant-btn-background-ghost" }
|
||||
- wait: 1500
|
||||
- type: { selector: "input[type=\"email\"]", text: "j.thompson@example.com" }
|
||||
- type: { selector: "input[type=\"password\"]", text: "password" }
|
||||
- screenshot: { name: "15-trainee-login" }
|
||||
- click: { selector: "button[type=\"submit\"]" }
|
||||
- wait: 3000
|
||||
- screenshot: { name: "16-trainee-org-view" }
|
||||
- scroll: { y: 250 }
|
||||
- wait: 700
|
||||
- screenshot: { name: "17-trainee-roles-list" }
|
||||
- click: { selector: ".role-item:first-of-type .ant-btn-default" }
|
||||
- wait: 4000
|
||||
- screenshot: { name: "18-onboarding-entry" }
|
||||
- scroll: { y: 400 }
|
||||
- wait: 1000
|
||||
- screenshot: { name: "19-onboarding-mid" }
|
||||
- scroll: { y: 800 }
|
||||
- wait: 700
|
||||
- screenshot: { name: "20-onboarding-bottom" }
|
||||
|
||||
export:
|
||||
videos: { format: "mp4", fps: 24, crf: 18 }
|
||||
combined_video: { enabled: true, name: "demo-recording.mp4" }
|
||||
1716
site/package-lock.json
generated
1716
site/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -22,8 +22,10 @@
|
|||
"ant-design-vue": "^4.2.6",
|
||||
"axios": "^1.13.2",
|
||||
"dompurify": "^3.3.1",
|
||||
"launchreel": "^0.0.3",
|
||||
"marked": "^17.0.3",
|
||||
"pinia": "^3.0.4",
|
||||
"rrweb": "^2.0.0-alpha.20",
|
||||
"vue": "^3.5.26",
|
||||
"vue-router": "^4.6.4"
|
||||
},
|
||||
|
|
|
|||
BIN
site/public/launch-reel.mp4
Normal file
BIN
site/public/launch-reel.mp4
Normal file
Binary file not shown.
BIN
site/public/screenshots/feature-agent-workflows.png
Normal file
BIN
site/public/screenshots/feature-agent-workflows.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 221 KiB |
BIN
site/public/screenshots/feature-modular-content.png
Normal file
BIN
site/public/screenshots/feature-modular-content.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 788 KiB |
BIN
site/public/screenshots/feature-reporting.png
Normal file
BIN
site/public/screenshots/feature-reporting.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 143 KiB |
|
|
@ -10,6 +10,7 @@ import {
|
|||
UserAddOutlined,
|
||||
BuildOutlined,
|
||||
PayCircleOutlined,
|
||||
QuestionCircleOutlined,
|
||||
} from '@ant-design/icons-vue'
|
||||
import { useRoute, useRouter } from 'vue-router'
|
||||
import { useUserStore } from './stores/userStore'
|
||||
|
|
@ -30,6 +31,7 @@ type NavItem = {
|
|||
const navItems: NavItem[] = [
|
||||
{ key: '/', label: 'Home', icon: HomeOutlined, path: '/' },
|
||||
{ key: '/about', label: 'About', icon: InfoCircleOutlined, path: '/about' },
|
||||
{ key: '/getting-started', label: 'Get Started', icon: QuestionCircleOutlined, path: '/getting-started' },
|
||||
{ key: '/pricing', label: 'Pricing', icon: PayCircleOutlined, path: '/pricing' },
|
||||
{ key: '/agents', label: 'Agents', icon: RobotOutlined, path: '/agents', manager: true },
|
||||
{ key: '/organization', label: 'Organizations', icon: BuildOutlined, path: '/organization' },
|
||||
|
|
|
|||
|
|
@ -15,6 +15,11 @@ const router = createRouter({
|
|||
name: 'about',
|
||||
component: () => import('../views/AboutView.vue'),
|
||||
},
|
||||
{
|
||||
path: '/getting-started',
|
||||
name: 'getting-started',
|
||||
component: () => import('../views/GettingStartedView.vue'),
|
||||
},
|
||||
{
|
||||
path: '/pricing',
|
||||
name: 'pricing',
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ export interface TrainingFile {
|
|||
file_size: number
|
||||
file_type: string
|
||||
description: string
|
||||
error_message: string
|
||||
is_processed: boolean
|
||||
status: 'ingesting' | 'chunked' | 'embedded' | 'failed'
|
||||
file_url: string
|
||||
|
|
|
|||
|
|
@ -1,5 +1,29 @@
|
|||
<script setup lang="ts">
|
||||
import { Card, Typography, Divider, List } from 'ant-design-vue'
|
||||
import { useRouter } from 'vue-router'
|
||||
import { Card, Typography, Divider, List, Button, Space } from 'ant-design-vue'
|
||||
import { PlayCircleOutlined, GithubOutlined, CodeOutlined } from '@ant-design/icons-vue'
|
||||
|
||||
const router = useRouter()
|
||||
|
||||
const REPO = 'https://git.cs.bham.ac.uk/projects-2025-26/vxn217'
|
||||
|
||||
const composeLinks = [
|
||||
{
|
||||
label: 'Development compose',
|
||||
path: 'compose/dev/docker-compose.yml',
|
||||
desc: 'Local development stack with hot-reload and dev tooling. (Recommended for inspectors)',
|
||||
},
|
||||
{
|
||||
label: 'Production compose',
|
||||
path: 'compose/prod/docker-compose.yml',
|
||||
desc: 'Production stack with Traefik reverse proxy and optimised builds.',
|
||||
},
|
||||
{
|
||||
label: 'Production inference compose',
|
||||
path: 'compose/prod/docker-compose.inference.yml',
|
||||
desc: 'Separate GPU inference service for the production environment.',
|
||||
},
|
||||
]
|
||||
|
||||
const steps = [
|
||||
'Register or login.',
|
||||
|
|
@ -11,17 +35,17 @@ const features = [
|
|||
{
|
||||
title: 'Modular Content',
|
||||
desc: 'Compose learning journeys from small, reusable modules. Mix videos and interactive checks.',
|
||||
img: 'https://placehold.co/600x400?text=Modular+Content',
|
||||
img: '/screenshots/feature-modular-content.png',
|
||||
},
|
||||
{
|
||||
title: 'Agent Workflows',
|
||||
desc: 'Automate guidance and triggers with configurable agents to move users through onboarding steps.',
|
||||
img: 'https://placehold.co/600x400?text=Agent+Workflows',
|
||||
img: '/screenshots/feature-agent-workflows.png',
|
||||
},
|
||||
{
|
||||
title: 'Reporting & Insights',
|
||||
desc: 'Lightweight reports showing completion and engagement metrics.',
|
||||
img: 'https://placehold.co/600x400?text=Reporting',
|
||||
img: '/screenshots/feature-reporting.png',
|
||||
},
|
||||
]
|
||||
</script>
|
||||
|
|
@ -35,6 +59,11 @@ const features = [
|
|||
with modular content and agent-driven workflows. It is designed for teams that want
|
||||
tangible learning experiences quickly without complex LMS setup.
|
||||
</Typography.Paragraph>
|
||||
<div class="video-wrapper">
|
||||
<video controls class="launch-video">
|
||||
<source src="/launch-reel.mp4" type="video/mp4" />
|
||||
</video>
|
||||
</div>
|
||||
<Divider />
|
||||
<Typography.Title :level="4">Getting started</Typography.Title>
|
||||
<List :data-source="steps" :bordered="false">
|
||||
|
|
@ -45,6 +74,44 @@ const features = [
|
|||
</List.Item>
|
||||
</template>
|
||||
</List>
|
||||
<Space style="margin-top: 1rem">
|
||||
<Button type="primary" @click="router.push('/getting-started')">
|
||||
<PlayCircleOutlined />
|
||||
Get started
|
||||
</Button>
|
||||
<Button @click="router.push('/register')">Register now</Button>
|
||||
</Space>
|
||||
<Divider />
|
||||
<Typography.Title :level="4">Self-host</Typography.Title>
|
||||
<Typography.Paragraph type="secondary">
|
||||
Dynavera runs entirely on your own infrastructure. Clone the repository and use one
|
||||
of the Docker Compose stacks below to get up and running.
|
||||
</Typography.Paragraph>
|
||||
<div class="compose-list">
|
||||
<div v-for="c in composeLinks" :key="c.path" class="compose-row">
|
||||
<div class="compose-info">
|
||||
<Typography.Text strong>{{ c.label }}</Typography.Text>
|
||||
<Typography.Text type="secondary" class="compose-desc">{{ c.desc }}</Typography.Text>
|
||||
</div>
|
||||
<a :href="`${REPO}/-/blob/main/${c.path}`" target="_blank" rel="noopener">
|
||||
<Button size="small">
|
||||
<CodeOutlined />
|
||||
View file
|
||||
</Button>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<Space style="margin-top: 1rem">
|
||||
<a :href="REPO" target="_blank" rel="noopener">
|
||||
<Button type="primary">
|
||||
<GithubOutlined />
|
||||
View repository
|
||||
</Button>
|
||||
</a>
|
||||
<a :href="`${REPO}/-/blob/main/compose/prod/docker-compose.yml`" target="_blank" rel="noopener">
|
||||
<Button>Self host</Button>
|
||||
</a>
|
||||
</Space>
|
||||
<Divider />
|
||||
<Typography.Title :level="4">Features</Typography.Title>
|
||||
<div class="features">
|
||||
|
|
@ -80,12 +147,39 @@ const features = [
|
|||
.row {
|
||||
padding: 0.5rem 0;
|
||||
}
|
||||
.hero {
|
||||
width: 100%;
|
||||
height: 320px;
|
||||
object-fit: cover;
|
||||
.compose-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.6rem;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
.compose-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 1rem;
|
||||
background: #f8fafc;
|
||||
border: 1px solid #dbe3ec;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 1rem;
|
||||
padding: 0.65rem 1rem;
|
||||
}
|
||||
.compose-info {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.1rem;
|
||||
}
|
||||
.compose-desc {
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
.video-wrapper {
|
||||
margin: 1rem 0 0.5rem;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.launch-video {
|
||||
width: 100%;
|
||||
display: block;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.features {
|
||||
display: grid;
|
||||
|
|
@ -108,7 +202,6 @@ const features = [
|
|||
.feature-body {
|
||||
padding: 0.75rem 1rem;
|
||||
}
|
||||
|
||||
.feature-body :deep(.ant-typography-secondary) {
|
||||
color: #4b5563 !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ const agentTypeOptions = [
|
|||
]
|
||||
|
||||
const agentTypeDescriptions: Record<string, string> = {
|
||||
curriculum: 'Guides new hires through a structured onboarding path — presenting content, tasks, and milestones in a defined sequence for a given role.',
|
||||
curriculum: 'Guides new hires through a structured onboarding path, presenting content, tasks, and milestones in a defined sequence for a given role.',
|
||||
knowledge: 'Answers ad-hoc questions by searching your uploaded training documents and knowledge base. Use this for open-ended Q&A during onboarding.',
|
||||
assessment: 'Tests understanding through role-specific questions and scenarios, then reports results back to the onboarding session so progress can be tracked.',
|
||||
monitor: 'Tracks overall session progress and surfaces completions or blockers for manager review without directly interacting with the new hire.',
|
||||
|
|
|
|||
472
site/src/views/GettingStartedView.vue
Normal file
472
site/src/views/GettingStartedView.vue
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
<script setup lang="ts">
|
||||
import { ref } from 'vue'
|
||||
import { useRouter } from 'vue-router'
|
||||
import { Card, Typography, Tabs, Button, Tag, Divider, Space } from 'ant-design-vue'
|
||||
import {
|
||||
UserAddOutlined,
|
||||
TeamOutlined,
|
||||
UploadOutlined,
|
||||
PlayCircleOutlined,
|
||||
CheckCircleOutlined,
|
||||
DashboardOutlined,
|
||||
SettingOutlined,
|
||||
BookOutlined,
|
||||
RobotOutlined,
|
||||
EyeOutlined,
|
||||
} from '@ant-design/icons-vue'
|
||||
|
||||
const router = useRouter()
|
||||
const activeTab = ref('trainee')
|
||||
|
||||
const traineeSteps = [
|
||||
{
|
||||
title: 'Register an account',
|
||||
icon: UserAddOutlined,
|
||||
description:
|
||||
'Click Register in the top-right corner. Fill in your name, email, and a password. If you have an invite link from your manager, open that link after registering; it will automatically join you to the right organization.',
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
action: { label: 'Register now', path: '/register' },
|
||||
},
|
||||
{
|
||||
title: 'Join your organization',
|
||||
icon: TeamOutlined,
|
||||
description:
|
||||
'Once logged in, go to Organizations in the nav. If your manager shared an invite link, follow it to be added automatically. Otherwise, ask your manager to send you one, as you cannot self-join without an invite.',
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
},
|
||||
{
|
||||
title: 'Open your onboarding session',
|
||||
icon: PlayCircleOutlined,
|
||||
description:
|
||||
'Inside your organization, you will see a list of roles you have been assigned. Click Start Onboarding next to your role. The system will open a live chat session with the AI trainer.',
|
||||
tag: 'Main activity',
|
||||
tagColor: 'green',
|
||||
},
|
||||
{
|
||||
title: 'Work through the curriculum',
|
||||
icon: BookOutlined,
|
||||
description:
|
||||
"The Curriculum Agent will present a structured learning plan for your role. Read through each module, ask questions in the chat, and the Knowledge Agent will retrieve answers grounded in your organization's uploaded documents.",
|
||||
tag: 'Main activity',
|
||||
tagColor: 'green',
|
||||
},
|
||||
{
|
||||
title: 'Complete the assessment',
|
||||
icon: CheckCircleOutlined,
|
||||
description:
|
||||
'After the curriculum, the Assessment Agent will run a quiz. Multiple-choice answers are graded automatically. Written answers are graded by the AI. You will receive per-question feedback.',
|
||||
tag: 'Main activity',
|
||||
tagColor: 'green',
|
||||
},
|
||||
{
|
||||
title: 'Review your progress',
|
||||
icon: DashboardOutlined,
|
||||
description:
|
||||
'Go to Progress in the nav to see your completed sessions, scores, and a summary generated by the Progress Monitor Agent. Your manager can also view this to track your ramp-up.',
|
||||
tag: 'Optional',
|
||||
tagColor: 'default',
|
||||
action: { label: 'View progress', path: '/progress' },
|
||||
},
|
||||
]
|
||||
|
||||
const managerSteps = [
|
||||
{
|
||||
title: 'Register and create an organization',
|
||||
icon: UserAddOutlined,
|
||||
description:
|
||||
'Register with the code MANAGER2026 to get manager access. After logging in, go to Organizations and create a new organization for your team. Give it a clear name, as trainees will see this.',
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
action: { label: 'Register now', path: '/register' },
|
||||
},
|
||||
{
|
||||
title: 'Upload training documents',
|
||||
icon: UploadOutlined,
|
||||
description:
|
||||
'Inside your organization, open the Manage tab and go to Training Files. Upload PDFs, DOCX, or TXT files containing your internal documentation, SOPs, or role guides. The system will process these in the background; watch for the "Ready" status before starting sessions.',
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
},
|
||||
{
|
||||
title: 'Create roles',
|
||||
icon: SettingOutlined,
|
||||
description:
|
||||
"In the Manage tab, add roles (e.g. \"Software Engineer\", \"Customer Success\"). Each role gets its own training scope. You can attach specific documents to a role so the AI only retrieves relevant knowledge during that role's onboarding.",
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
},
|
||||
{
|
||||
title: 'Configure agent behaviour (optional)',
|
||||
icon: RobotOutlined,
|
||||
description:
|
||||
'Go to Agents in the nav to view and configure the four agent roles: Curriculum, Knowledge, Assessment, and Progress Monitor. You can adjust the role description and system prompt to tune how the AI instructs trainees for your specific context.',
|
||||
tag: 'Optional',
|
||||
tagColor: 'default',
|
||||
action: { label: 'Open Agents', path: '/agents' },
|
||||
},
|
||||
{
|
||||
title: 'Invite team members',
|
||||
icon: TeamOutlined,
|
||||
description:
|
||||
'In the Manage tab, go to Members and generate an invite link. Share this link with new hires. When they register and follow the link, they are automatically added to your organization and can begin onboarding.',
|
||||
tag: 'Required',
|
||||
tagColor: 'blue',
|
||||
},
|
||||
{
|
||||
title: 'Monitor progress',
|
||||
icon: DashboardOutlined,
|
||||
description:
|
||||
'Once trainees complete sessions, visit Progress to review scores, curriculum completion, and the AI-generated progress summaries. Use this to identify who needs follow-up and which training content may need updating.',
|
||||
tag: 'Ongoing',
|
||||
tagColor: 'purple',
|
||||
action: { label: 'View progress', path: '/progress' },
|
||||
},
|
||||
]
|
||||
|
||||
const inspectorManagerSteps = [
|
||||
{
|
||||
title: 'Register a manager account',
|
||||
icon: UserAddOutlined,
|
||||
description:
|
||||
'Go to Register and sign up with any name and email. Enter the invite code MANAGER2026 to receive manager permissions. This account will be your admin perspective for the evaluation.',
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
action: { label: 'Register', path: '/register' },
|
||||
},
|
||||
{
|
||||
title: 'Create an organization',
|
||||
icon: SettingOutlined,
|
||||
description:
|
||||
'After logging in, navigate to Organizations and create a new organization. Give it any name you like.',
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
},
|
||||
{
|
||||
title: 'Create a role',
|
||||
icon: RobotOutlined,
|
||||
description:
|
||||
'Open the Manage tab inside your organization and add a role (e.g. "Engineer"). This scopes the training content and onboarding session for the trainee.',
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
},
|
||||
{
|
||||
title: 'Upload a training document',
|
||||
icon: UploadOutlined,
|
||||
description:
|
||||
'Still in the Manage tab, go to Training Files and upload any PDF or text file. You can scope it to the role you just created. The system processes it in the background; wait for the status to show Ready before starting any sessions.',
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
},
|
||||
{
|
||||
title: 'Generate an invite link',
|
||||
icon: TeamOutlined,
|
||||
description:
|
||||
'In the Manage tab under Members, generate an invite link. Copy it; you will use this to join as the trainee account in the next section.',
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
},
|
||||
]
|
||||
|
||||
const inspectorTraineeSteps = [
|
||||
{
|
||||
title: 'Register a second (trainee) account',
|
||||
icon: UserAddOutlined,
|
||||
description:
|
||||
'Open a private/incognito tab or a different browser. Register again with a different email address. No invite code needed this time; this will be your trainee perspective.',
|
||||
tag: 'Account 2',
|
||||
tagColor: 'purple',
|
||||
action: { label: 'Register', path: '/register' },
|
||||
},
|
||||
{
|
||||
title: 'Join via the invite link',
|
||||
icon: TeamOutlined,
|
||||
description:
|
||||
'Paste the invite link you copied from the manager account into this browser. It will add you to the organization and assign you to the role you created.',
|
||||
tag: 'Account 2',
|
||||
tagColor: 'purple',
|
||||
},
|
||||
{
|
||||
title: 'Start an onboarding session',
|
||||
icon: PlayCircleOutlined,
|
||||
description:
|
||||
'Go to your organization and click Start Onboarding on the assigned role. The system opens a live WebSocket session with the AI trainer. Watch for the status indicators as each agent (Curriculum, Knowledge, Assessment) runs in sequence.',
|
||||
tag: 'Account 2',
|
||||
tagColor: 'purple',
|
||||
},
|
||||
{
|
||||
title: 'Complete the curriculum and assessment',
|
||||
icon: CheckCircleOutlined,
|
||||
description:
|
||||
'Work through the generated curriculum modules. Complete the final quiz; answers are graded automatically for multiple choice and by the AI for written responses. Your progress is saved after each step.',
|
||||
tag: 'Account 2',
|
||||
tagColor: 'purple',
|
||||
},
|
||||
{
|
||||
title: 'Switch back to the manager account and review',
|
||||
icon: EyeOutlined,
|
||||
description:
|
||||
"Return to your manager browser window. Go to Progress to see the trainee's completed session, scores, and the AI-generated progress summary. This is the full end-to-end loop.",
|
||||
tag: 'Account 1',
|
||||
tagColor: 'orange',
|
||||
action: { label: 'View progress', path: '/progress' },
|
||||
},
|
||||
]
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<div class="page">
|
||||
<Card class="panel" :bordered="false">
|
||||
<Typography.Title :level="2">Getting Started</Typography.Title>
|
||||
<Typography.Paragraph type="secondary">
|
||||
Follow the guide for your role below. If you are new to Dynavera, start with the
|
||||
Trainee guide to see how an onboarding session works, or the Manager guide to set up
|
||||
your organization.
|
||||
</Typography.Paragraph>
|
||||
|
||||
<Tabs v-model:activeKey="activeTab" size="large">
|
||||
<Tabs.TabPane key="trainee" tab="I am a Trainee / New Hire">
|
||||
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
|
||||
You have been invited to an organization and want to begin your onboarding session.
|
||||
</Typography.Paragraph>
|
||||
<div class="steps-list">
|
||||
<div
|
||||
v-for="(step, index) in traineeSteps"
|
||||
:key="step.title"
|
||||
class="step-row"
|
||||
>
|
||||
<div class="step-number">{{ index + 1 }}</div>
|
||||
<div class="step-body">
|
||||
<div class="step-header">
|
||||
<component :is="step.icon" class="step-icon" />
|
||||
<Typography.Text strong style="font-size: 1rem">
|
||||
{{ step.title }}
|
||||
</Typography.Text>
|
||||
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
|
||||
{{ step.tag }}
|
||||
</Tag>
|
||||
</div>
|
||||
<Typography.Paragraph
|
||||
type="secondary"
|
||||
style="margin: 0.4rem 0 0.6rem"
|
||||
>
|
||||
{{ step.description }}
|
||||
</Typography.Paragraph>
|
||||
<Button
|
||||
v-if="step.action"
|
||||
type="primary"
|
||||
size="small"
|
||||
@click="router.push(step.action.path)"
|
||||
>
|
||||
{{ step.action.label }}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Tabs.TabPane>
|
||||
|
||||
<Tabs.TabPane key="manager" tab="I am a Manager / Admin">
|
||||
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
|
||||
You want to set up your organization, upload training materials, and onboard
|
||||
new hires.
|
||||
</Typography.Paragraph>
|
||||
<div class="steps-list">
|
||||
<div
|
||||
v-for="(step, index) in managerSteps"
|
||||
:key="step.title"
|
||||
class="step-row"
|
||||
>
|
||||
<div class="step-number">{{ index + 1 }}</div>
|
||||
<div class="step-body">
|
||||
<div class="step-header">
|
||||
<component :is="step.icon" class="step-icon" />
|
||||
<Typography.Text strong style="font-size: 1rem">
|
||||
{{ step.title }}
|
||||
</Typography.Text>
|
||||
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
|
||||
{{ step.tag }}
|
||||
</Tag>
|
||||
</div>
|
||||
<Typography.Paragraph
|
||||
type="secondary"
|
||||
style="margin: 0.4rem 0 0.6rem"
|
||||
>
|
||||
{{ step.description }}
|
||||
</Typography.Paragraph>
|
||||
<Button
|
||||
v-if="step.action"
|
||||
type="primary"
|
||||
size="small"
|
||||
@click="router.push(step.action.path)"
|
||||
>
|
||||
{{ step.action.label }}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Tabs.TabPane>
|
||||
|
||||
<Tabs.TabPane key="inspector" tab="Inspector / Evaluator">
|
||||
<Typography.Paragraph style="margin-bottom: 0.25rem; color: #6b7280">
|
||||
To evaluate the full system, create two separate accounts: one as a manager
|
||||
and one as a trainee. Follow both sets of steps in order. Use a private /
|
||||
incognito window for the second account so both sessions stay open at once.
|
||||
</Typography.Paragraph>
|
||||
<Typography.Paragraph style="margin-bottom: 1.5rem; color: #6b7280">
|
||||
The GPU inference node runs on a personal machine and may occasionally be
|
||||
offline. If the AI chat stalls, the inference layer is unavailable; refer to
|
||||
the report for local setup instructions.
|
||||
</Typography.Paragraph>
|
||||
|
||||
<Typography.Title :level="4" style="margin-bottom: 1rem">
|
||||
Part 1: Manager account
|
||||
<Tag color="orange" style="margin-left: 0.5rem; vertical-align: middle">Account 1</Tag>
|
||||
</Typography.Title>
|
||||
<div class="steps-list">
|
||||
<div
|
||||
v-for="(step, index) in inspectorManagerSteps"
|
||||
:key="step.title"
|
||||
class="step-row"
|
||||
>
|
||||
<div class="step-number step-number--orange">{{ index + 1 }}</div>
|
||||
<div class="step-body">
|
||||
<div class="step-header">
|
||||
<component :is="step.icon" class="step-icon" />
|
||||
<Typography.Text strong style="font-size: 1rem">
|
||||
{{ step.title }}
|
||||
</Typography.Text>
|
||||
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
|
||||
{{ step.tag }}
|
||||
</Tag>
|
||||
</div>
|
||||
<Typography.Paragraph
|
||||
type="secondary"
|
||||
style="margin: 0.4rem 0 0.6rem"
|
||||
>
|
||||
{{ step.description }}
|
||||
</Typography.Paragraph>
|
||||
<Button
|
||||
v-if="step.action"
|
||||
type="primary"
|
||||
size="small"
|
||||
@click="router.push(step.action.path)"
|
||||
>
|
||||
{{ step.action.label }}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Divider />
|
||||
|
||||
<Typography.Title :level="4" style="margin-bottom: 1rem">
|
||||
Part 2: Trainee account
|
||||
<Tag color="purple" style="margin-left: 0.5rem; vertical-align: middle">Account 2</Tag>
|
||||
</Typography.Title>
|
||||
<div class="steps-list">
|
||||
<div
|
||||
v-for="(step, index) in inspectorTraineeSteps"
|
||||
:key="step.title"
|
||||
class="step-row"
|
||||
>
|
||||
<div class="step-number step-number--purple">{{ index + 1 }}</div>
|
||||
<div class="step-body">
|
||||
<div class="step-header">
|
||||
<component :is="step.icon" class="step-icon" />
|
||||
<Typography.Text strong style="font-size: 1rem">
|
||||
{{ step.title }}
|
||||
</Typography.Text>
|
||||
<Tag :color="step.tagColor" style="margin-left: 0.5rem">
|
||||
{{ step.tag }}
|
||||
</Tag>
|
||||
</div>
|
||||
<Typography.Paragraph
|
||||
type="secondary"
|
||||
style="margin: 0.4rem 0 0.6rem"
|
||||
>
|
||||
{{ step.description }}
|
||||
</Typography.Paragraph>
|
||||
<Button
|
||||
v-if="step.action"
|
||||
type="primary"
|
||||
size="small"
|
||||
@click="router.push(step.action.path)"
|
||||
>
|
||||
{{ step.action.label }}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Tabs.TabPane>
|
||||
</Tabs>
|
||||
|
||||
<Divider />
|
||||
|
||||
<Space>
|
||||
<Button type="primary" @click="router.push('/register')">
|
||||
<UserAddOutlined />
|
||||
Register to get started
|
||||
</Button>
|
||||
<Button @click="router.push('/about')">About Dynavera</Button>
|
||||
</Space>
|
||||
</Card>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
.page {
|
||||
max-width: 860px;
|
||||
}
|
||||
.panel {
|
||||
max-width: 860px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.steps-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.25rem;
|
||||
}
|
||||
.step-row {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
align-items: flex-start;
|
||||
}
|
||||
.step-number {
|
||||
flex-shrink: 0;
|
||||
width: 2rem;
|
||||
height: 2rem;
|
||||
border-radius: 50%;
|
||||
background: #2563eb;
|
||||
color: #fff;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-weight: 700;
|
||||
font-size: 0.9rem;
|
||||
margin-top: 0.1rem;
|
||||
}
|
||||
.step-number--orange {
|
||||
background: #d97706;
|
||||
}
|
||||
.step-number--purple {
|
||||
background: #7c3aed;
|
||||
}
|
||||
.step-body {
|
||||
flex: 1;
|
||||
background: #f8fafc;
|
||||
border: 1px solid #dbe3ec;
|
||||
border-radius: 8px;
|
||||
padding: 0.9rem 1.1rem;
|
||||
}
|
||||
.step-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.step-icon {
|
||||
font-size: 1.1rem;
|
||||
color: #2563eb;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -39,7 +39,7 @@ const features = [
|
|||
{
|
||||
title: 'Knowledge Mesh',
|
||||
description:
|
||||
'Ingest docs, wikis, and repos — keep assistants current with zero manual updates.',
|
||||
'Ingest docs, wikis, and repos to keep assistants current with zero manual updates.',
|
||||
icon: CloudTwoTone,
|
||||
},
|
||||
]
|
||||
|
|
@ -99,11 +99,11 @@ const logos = [
|
|||
faster.
|
||||
</Typography.Paragraph>
|
||||
<Space>
|
||||
<RouterLink to="/about">
|
||||
<Button type="primary" size="large">Learn More</Button>
|
||||
<RouterLink to="/getting-started">
|
||||
<Button type="primary" size="large">Get Started</Button>
|
||||
</RouterLink>
|
||||
<RouterLink to="/organization">
|
||||
<Button size="large">See Onboarding Flows</Button>
|
||||
<RouterLink to="/about">
|
||||
<Button size="large">Learn More</Button>
|
||||
</RouterLink>
|
||||
</Space>
|
||||
<Divider />
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ const fetchTrainingFileWarning = async () => {
|
|||
const allFiles: { status: string; scope?: string }[] = Array.isArray(res.data)
|
||||
? res.data
|
||||
: (res.data as { results?: { status: string; scope?: string }[] }).results ?? []
|
||||
// Only consider role-scoped files — org-wide files apply to all roles
|
||||
// Only consider role-scoped files; org-wide files apply to all roles
|
||||
// and their ingestion state shouldn't block a specific role's onboarding
|
||||
const files = allFiles.filter((f) => f.scope === 'role')
|
||||
const ingesting = files.filter((f) => f.status === 'ingesting').length
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import {
|
|||
Upload,
|
||||
Steps,
|
||||
Table,
|
||||
Tooltip,
|
||||
} from 'ant-design-vue'
|
||||
import { apiClient, isAxiosError, API } from '../router/api'
|
||||
import { useUserStore } from '../stores/userStore'
|
||||
|
|
@ -324,17 +325,20 @@ const trainingFileColumns = [
|
|||
},
|
||||
{
|
||||
title: 'Status',
|
||||
dataIndex: 'status',
|
||||
key: 'status',
|
||||
customRender: ({ value }: { value: string }) => {
|
||||
customRender: ({ record }: { record: TrainingFile }) => {
|
||||
const statusMap: Record<string, { color: string; label: string }> = {
|
||||
ingesting: { color: 'processing', label: 'Ingesting' },
|
||||
chunked: { color: 'blue', label: 'Chunked' },
|
||||
embedded: { color: 'success', label: 'Embedded' },
|
||||
failed: { color: 'error', label: 'Failed' },
|
||||
}
|
||||
const status = statusMap[value] || { color: 'default', label: value }
|
||||
return h(Tag, { color: status.color }, () => status.label)
|
||||
const status = statusMap[record.status] || { color: 'default', label: record.status }
|
||||
const tag = h(Tag, { color: status.color }, () => status.label)
|
||||
if (record.status === 'failed' && record.error_message) {
|
||||
return h(Tooltip, { title: record.error_message }, () => tag)
|
||||
}
|
||||
return tag
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
@ -562,7 +566,19 @@ const createInvite = async () => {
|
|||
}
|
||||
}
|
||||
|
||||
const fallbackCopyText = (text: string): boolean => {
|
||||
const copyToClipboard = async (text: string): Promise<boolean> => {
|
||||
const safeText = String(text || '').trim()
|
||||
if (!safeText) return false
|
||||
|
||||
if (window.isSecureContext && window.navigator.clipboard?.writeText) {
|
||||
try {
|
||||
await window.navigator.clipboard.writeText(safeText)
|
||||
return true
|
||||
} catch {
|
||||
// Fallback to older method if clipboard API fails
|
||||
}
|
||||
}
|
||||
|
||||
const textarea = document.createElement('textarea')
|
||||
textarea.value = text
|
||||
textarea.setAttribute('readonly', 'true')
|
||||
|
|
@ -578,22 +594,6 @@ const fallbackCopyText = (text: string): boolean => {
|
|||
return copied
|
||||
}
|
||||
|
||||
const copyToClipboard = async (text: string): Promise<boolean> => {
|
||||
const safeText = String(text || '').trim()
|
||||
if (!safeText) return false
|
||||
|
||||
if (window.isSecureContext && window.navigator.clipboard?.writeText) {
|
||||
try {
|
||||
await window.navigator.clipboard.writeText(safeText)
|
||||
return true
|
||||
} catch {
|
||||
// Fall through to legacy copy for restricted browser contexts.
|
||||
}
|
||||
}
|
||||
|
||||
return fallbackCopyText(safeText)
|
||||
}
|
||||
|
||||
const copyInviteUrl = async () => {
|
||||
const copied = await copyToClipboard(newInviteUrl.value)
|
||||
if (copied) {
|
||||
|
|
@ -962,7 +962,7 @@ onUnmounted(() => {
|
|||
size="small"
|
||||
/>
|
||||
<Typography.Paragraph v-else type="secondary">
|
||||
No training files uploaded yet. Use the Upload Training File button to add files — you can scope them to a specific role or make them available to all roles.
|
||||
No training files uploaded yet. Use the Upload Training File button to add files. You can scope them to a specific role or make them available to all roles.
|
||||
</Typography.Paragraph>
|
||||
</div>
|
||||
</Tabs.TabPane>
|
||||
|
|
|
|||
|
|
@ -32,6 +32,10 @@ authentication, and connectivity to internal systems.`
|
|||
|
||||
const router = useRouter()
|
||||
|
||||
const REPO = 'https://git.cs.bham.ac.uk/projects-2025-26/vxn217'
|
||||
const DEV_COMPOSE = `${REPO}/-/blob/main/compose/dev/docker-compose.yml`
|
||||
const PROD_COMPOSE = `${REPO}/-/blob/main/compose/prod/docker-compose.yml`
|
||||
|
||||
const selfHostSteps = [
|
||||
'Clone the repository locally',
|
||||
'Copy and edit .env.template (or create .env) with your settings',
|
||||
|
|
@ -73,11 +77,13 @@ const selfHostSteps = [
|
|||
<Button
|
||||
type="primary"
|
||||
v-if="plan.name === 'Community'"
|
||||
@click="router.push('/login')"
|
||||
@click="router.push('/getting-started')"
|
||||
>
|
||||
Get Started
|
||||
</Button>
|
||||
<Button v-else>Self-Host</Button>
|
||||
<a v-else :href="PROD_COMPOSE" target="_blank" rel="noopener">
|
||||
<Button>Self-Host</Button>
|
||||
</a>
|
||||
</Space>
|
||||
</Card>
|
||||
</Col>
|
||||
|
|
@ -106,8 +112,12 @@ const selfHostSteps = [
|
|||
the the production compose when preparing a production deployment.
|
||||
</Typography.Paragraph>
|
||||
<Space>
|
||||
<Button>Use development compose</Button>
|
||||
<Button>Use production compose</Button>
|
||||
<a :href="DEV_COMPOSE" target="_blank" rel="noopener">
|
||||
<Button>Development compose</Button>
|
||||
</a>
|
||||
<a :href="PROD_COMPOSE" target="_blank" rel="noopener">
|
||||
<Button type="primary">Production compose</Button>
|
||||
</a>
|
||||
</Space>
|
||||
|
||||
<Divider />
|
||||
|
|
|
|||
56
site/study-raw-response.txt
Normal file
56
site/study-raw-response.txt
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
```yaml
|
||||
project:
|
||||
name: "Dynavera"
|
||||
url: "https://fyp.viswamedha.com"
|
||||
viewport: { width: 1440, height: 900, deviceScaleFactor: 2 }
|
||||
theme: "light"
|
||||
|
||||
scenarios:
|
||||
|
||||
- id: "manager-login-and-dashboard"
|
||||
name: "Manager Login and Dashboard Interactions"
|
||||
type: "screenshot+video"
|
||||
page: "/"
|
||||
actions:
|
||||
- dismiss_cookies: true
|
||||
- wait: 2000
|
||||
- screenshot: { name: "login-screen" }
|
||||
- click: { selector: ".ant-menu-item:nth-child(2)" } # About
|
||||
- wait: 1500
|
||||
- click: { selector: ".ant-menu-item:nth-child(3)" } # Getting Started
|
||||
- wait: 1500
|
||||
- click: { selector: ".ant-menu-item:nth-child(4)" } # Pricing
|
||||
- wait: 1500
|
||||
- screenshot: { name: "pricing-page" }
|
||||
- click: { selector: ".ant-btn-background-ghost" } # Login button
|
||||
- wait: 1500
|
||||
- type: { selector: "#email", text: "haleisaac@example.com" }
|
||||
- type: { selector: "#password", text: "password" }
|
||||
- click: { selector: ".ant-btn-primary" } # Submit login form
|
||||
- wait_for_selector: { selector: ".header .ant-btn-primary" }
|
||||
- screenshot: { name: "dashboard" }
|
||||
- hover_cards: { selector: ".ant-tabs-tab-btn", delay: 1000 }
|
||||
- wait: 2000
|
||||
|
||||
- id: "trainee-login-and-onboarding"
|
||||
name: "Trainee Login and Onboarding Interactions"
|
||||
type: "screenshot+video"
|
||||
page: "/"
|
||||
actions:
|
||||
- dismiss_cookies: true
|
||||
- wait: 2000
|
||||
- screenshot: { name: "login-screen" }
|
||||
- click: { selector: ".ant-btn-background-ghost" } # Login button
|
||||
- wait: 1500
|
||||
- type: { selector: "#email", text: "j.thompson@example.com" }
|
||||
- type: { selector: "#password", text: "password" }
|
||||
- click: { selector: ".ant-btn-primary" } # Submit login form
|
||||
- wait_for_selector: { selector: ".role-item .ant-btn-default" }
|
||||
- screenshot: { name: "onboarding-screen" }
|
||||
- click: { selector: ".role-item .ant-btn-default:first-child" } # Start onboarding
|
||||
- wait: 5000
|
||||
|
||||
export:
|
||||
videos: { format: "mp4", fps: 24, crf: 18 }
|
||||
combined_video: { enabled: true, name: "demo-recording.mp4" }
|
||||
```
|
||||
22
site/study.json
Normal file
22
site/study.json
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"baseUrl": "https://fyp.viswamedha.com",
|
||||
"siteName": "Dynavera",
|
||||
"pages": [
|
||||
{
|
||||
"url": "/",
|
||||
"title": "Dynavera",
|
||||
"metaDescription": "",
|
||||
"headings": [],
|
||||
"sections": [
|
||||
"app"
|
||||
],
|
||||
"interactive": [],
|
||||
"hasForms": false,
|
||||
"estimatedScrollHeight": 900,
|
||||
"navLinks": []
|
||||
}
|
||||
],
|
||||
"allLinks": [
|
||||
"/"
|
||||
]
|
||||
}
|
||||
Loading…
Reference in a new issue