diff --git a/report/.gitignore b/report/.gitignore index 8914ef8..aca6754 100644 --- a/report/.gitignore +++ b/report/.gitignore @@ -4,4 +4,6 @@ *.out *.toc *.pdf -*.gz \ No newline at end of file +*.gz +*.bbl +*.blg \ No newline at end of file diff --git a/report/diagrams/home-page.png b/report/diagrams/home-page.png deleted file mode 100644 index 523f85d..0000000 Binary files a/report/diagrams/home-page.png and /dev/null differ diff --git a/report/diagrams/onboarding-content-page.png b/report/diagrams/onboarding-content-page.png deleted file mode 100644 index 180efab..0000000 Binary files a/report/diagrams/onboarding-content-page.png and /dev/null differ diff --git a/report/diagrams/onboarding-loading-page.png b/report/diagrams/onboarding-loading-page.png deleted file mode 100644 index 292c7fd..0000000 Binary files a/report/diagrams/onboarding-loading-page.png and /dev/null differ diff --git a/report/diagrams/organization-page.png b/report/diagrams/organization-page.png deleted file mode 100644 index 353cc98..0000000 Binary files a/report/diagrams/organization-page.png and /dev/null differ diff --git a/report/references.bib b/report/references.bib new file mode 100644 index 0000000..0d3f65e --- /dev/null +++ b/report/references.bib @@ -0,0 +1,143 @@ +@misc{anthropic2024mcp, + author = {{Anthropic}}, + title = {Model Context Protocol (MCP) Specification}, + year = {2024}, + howpublished = {\url{https://modelcontextprotocol.io}}, + note = {Accessed: 2026-03-09} +} + +@misc{huggingface2024mcp, + author = {{Hugging Face}}, + title = {Introduction to Model Context Protocol (MCP)}, + year = {2024}, + howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}}, + note = {Accessed: 2026-03-09} +} + +@misc{langgraph2024, + author = {{LangChain}}, + title = {LangGraph: Building Stateful, Multi-agent Applications with LLMs}, + year = {2024}, + howpublished = {\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}}, + note = {Accessed: 2026-03-09} +} + +@misc{meta2024llama3, + author = {{Meta AI}}, + title = {Llama 3: Open-weight Large Language Models}, + year = {2024}, + howpublished = {\url{https://llama.meta.com/llama3/}}, + note = {Accessed: 2026-03-09} +} + +@misc{pgvector2024, + author = {{PostgreSQL Global Development Group}}, + title = {pgvector: Open-source Vector Similarity Search for PostgreSQL}, + year = {2024}, + howpublished = {\url{https://github.com/pgvector/pgvector}}, + note = {Accessed: 2026-03-09} +} + +@misc{pinecone2023rag, + author = {{Pinecone}}, + title = {Retrieval Augmented Generation (RAG) and Semantic Search}, + year = {2023}, + howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}}, + note = {Accessed: 2026-03-09} +} + +@misc{dettmers2023bitsandbytes, + author = {Dettmers, Tim}, + title = {4-bit Quantization and Bitsandbytes for LLMs}, + year = {2023}, + howpublished = {\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes}}, + note = {Accessed: 2026-03-09} +} + +@misc{vllm2024, + author = {{vLLM Team}}, + title = {High-Throughput Serving with PagedAttention}, + year = {2024}, + howpublished = {\url{https://vllm.ai}}, + note = {Accessed: 2026-03-09} +} + +@misc{channels2024docs, + author = {{Django Software Foundation}}, + title = {Django Channels Documentation}, + year = {2024}, + howpublished = {\url{https://channels.readthedocs.io/en/stable/}}, + note = {Accessed: 2026-03-09} +} + +@misc{django2024docs, + author = {{Django Software Foundation}}, + title = {Django Documentation}, + year = {2024}, + howpublished = {\url{https://docs.djangoproject.com/}}, + note = {Accessed: 2026-03-09} +} + +@misc{drf2024docs, + author = {{Encode OSS}}, + title = {Django REST Framework Documentation}, + year = {2024}, + howpublished = {\url{https://www.django-rest-framework.org/}}, + note = {Accessed: 2026-03-09} +} + +@misc{celery2024docs, + author = {{Celery Project}}, + title = {Celery Documentation}, + year = {2024}, + howpublished = {\url{https://docs.celeryq.dev/}}, + note = {Accessed: 2026-03-09} +} + +@misc{redis2024docs, + author = {{Redis Ltd.}}, + title = {Redis Documentation}, + year = {2024}, + howpublished = {\url{https://redis.io/docs/}}, + note = {Accessed: 2026-03-09} +} + +@misc{fastapi2024docs, + author = {{FastAPI}}, + title = {FastAPI Documentation}, + year = {2024}, + howpublished = {\url{https://fastapi.tiangolo.com/}}, + note = {Accessed: 2026-03-09} +} + +@misc{sbert2024docs, + author = {{UKPLab / SBERT}}, + title = {Sentence-Transformers Documentation}, + year = {2024}, + howpublished = {\url{https://www.sbert.net/}}, + note = {Accessed: 2026-03-09} +} + +@misc{llamacpp2024, + author = {{ggml-org}}, + title = {llama.cpp Documentation}, + year = {2024}, + howpublished = {\url{https://github.com/ggml-org/llama.cpp}}, + note = {Accessed: 2026-03-09} +} + +@misc{llamacpppython2024, + author = {Abetlen}, + title = {llama-cpp-python Documentation}, + year = {2024}, + howpublished = {\url{https://github.com/abetlen/llama-cpp-python}}, + note = {Accessed: 2026-03-09} +} + +@misc{pytorch2024docs, + author = {{PyTorch Team}}, + title = {PyTorch Documentation}, + year = {2024}, + howpublished = {\url{https://pytorch.org/docs/}}, + note = {Accessed: 2026-03-09} +} diff --git a/report/report.tex b/report/report.tex index 9c30252..9440705 100644 --- a/report/report.tex +++ b/report/report.tex @@ -2,7 +2,7 @@ \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{lmodern} -\usepackage[a4paper,margin=1in]{geometry} +\usepackage[a4paper,margin=0.75in]{geometry} \usepackage{longtable} \usepackage{booktabs} \usepackage{array} @@ -13,6 +13,7 @@ \usepackage[hidelinks]{hyperref} \usepackage{tabularx} \usepackage{xurl} +\usepackage[numbers,sort&compress]{natbib} % Report-style paragraph spacing \setlength{\parindent}{0pt} @@ -61,9 +62,9 @@ User & j.thompson@example.com & password \\ \end{tabular} \end{center} -\textit{Note: I will try to keep the public website available, but the GPU node -runs on my home PC and may occasionally go offline. For reliable testing, -I recommend running the system locally on a machine with a CUDA-enabled GPU.} +\textit{Note: The public site should always be available, but the GPU node +runs on my PC and can go offline. For reliable testing, +I recommend running my development compose stack on a CUDA-enabled machine with a GPU.} Manager registration code (for signup): \texttt{MANAGER2026} @@ -191,7 +192,7 @@ contextual reasoning, and adaptive response generation, making them well-suited for interactive, role-aware training scenarios. Unlike static documentation, LLM-driven systems can dynamically tailor explanations and guidance based on a user's specific role and prior -knowledge. +knowledge \cite{meta2024llama3,langgraph2024}. Rather than relying on a monolithic chatbot, Dynavera employs a collection of specialized, collaborating agents. This modular approach @@ -254,13 +255,13 @@ enable scalable, context-aware onboarding: objectives that exceed the capability of a single monolithic model. Within Dynavera, this enables separation of instructional delivery, contextual reasoning, knowledge retrieval, and evaluation, improving - modularity, explainability, and system adaptability. + modularity, explainability, and system adaptability \cite{langgraph2024}. \item Retrieval-Augmented Generation (RAG): Training responses are grounded in authoritative, organization-specific documentation rather than relying solely on a model's parametric knowledge. This ensures factual accuracy, contextual relevance, and rapid adaptability as - organizational knowledge evolves. + organizational knowledge evolves \cite{pinecone2023rag}. \end{itemize} To address data privacy and deployment constraints, Dynavera prioritizes @@ -268,7 +269,7 @@ local inference using quantized open-weight models (e.g., Llama 3 in GGUF format). This design choice reduces dependency on external cloud APIs, supports offline or air-gapped environments, and aligns with enterprise privacy requirements while maintaining acceptable inference -performance. +performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}. \subsection{Positioning Against Alternative Approaches}\label{positioning-against-alternative-approaches} @@ -371,11 +372,13 @@ MCP Router & Python & Provides a standardized interface for agents to query data This stack was selected to balance modularity, rapid iteration, and production readiness. A decoupled frontend-backend architecture lets the UI and API evolve independently, while PostgreSQL -with pgvector provides one ACID-compliant store for both relational state and vector retrieval. +with pgvector provides one ACID-compliant store for both relational state and vector retrieval +\cite{django2024docs,drf2024docs,pgvector2024}. To preserve performance and control, orchestration is implemented in native Python rather than heavier framework abstractions such as LangChain. This keeps agent state handling explicit, reduces latency in the WebSocket loop, -and supports local execution, data ownership, and architectural transparency during early-stage development. +and supports local execution, data ownership, and architectural transparency during early-stage development +\cite{langgraph2024,channels2024docs}. \subsection{Design Philosophy: The Distributed Agentic Pattern}\label{design-philosophy-the-distributed-agentic-pattern} @@ -383,7 +386,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern} Dynavera leverages the Model Context Protocol (MCP) to solve the "context gap" in corporate onboarding. Rather than providing the LLM with a static, bloated prompt, the system utilizes a Sidecar Tooling -approach: +approach \cite{anthropic2024mcp,huggingface2024mcp}: \begin{itemize} \item @@ -437,13 +440,13 @@ while orchestration-time interaction uses Django Channels over WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This allows the platform to handle both CRUD-style workflows and long-running, stateful agent interactions without forcing either pattern -into the other. +into the other \cite{drf2024docs,channels2024docs}. For ingestion, the backend follows an asynchronous execution path: uploaded files are stored as TrainingFile records, and a post-save trigger enqueues background processing through Celery (Redis broker). This prevents heavy preprocessing from blocking request-response latency -on the main web process. +on the main web process \cite{celery2024docs,redis2024docs}. Persistence is model-driven and traceable. Session state, progress, generated onboarding structures, and interaction events are stored in @@ -480,14 +483,14 @@ batches long content, and calls the GPU service at /v1/semantic-chunk. The service performs sentence-level semantic breakpoint detection using embedding-distance thresholds, then returns coherent chunks with embeddings. This avoids naive fixed-size splits that can break context -mid-concept. +mid-concept \cite{sbert2024docs,fastapi2024docs}. \underline{Vector storage and retrieval with pgvector}\\ Returned chunk embeddings are stored in RoleRagDocument.embedding (768 dimensions) in PostgreSQL using pgvector, linked relationally to role and source file metadata. Retrieval is performed in SQL using cosine-distance ranking and top-k selection, allowing role filtering and -similarity search in one query path. +similarity search in one query path \cite{pgvector2024}. \subsubsection{Agent Orchestration Workflow (Simplified)}\label{agent-orchestration-workflow-simplified} @@ -645,95 +648,9 @@ practical manner. While this project serves as a proof-of-concept, the modular nature of the specialist agents provides a clear path for future expansion into more nuanced, multi-modal onboarding scenarios. -\begin{figure*}[b] -\centering -\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/home-page.png} -\caption{Home page of Dynavera.} -\end{figure*} - -\begin{figure*}[b] -\centering -\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/organization-page.png} -\caption{Organization management view.} -\end{figure*} - -\begin{figure*}[b] -\centering -\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-loading-page.png} -\caption{Onboarding generation/loading state.} -\end{figure*} - -\begin{figure*}[b] -\centering -\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-content-page.png} -\caption{Onboarding content delivery view.} -\end{figure*} - \section{References}\label{references} - -\begin{itemize} -\item - Anthropic (2024). Model Context Protocol (MCP) Specification. - Available at: \url{https://modelcontextprotocol.io} (Accessed: 9 March - 2026). -\item - Hugging Face (2024). Introduction to Model Context Protocol (MCP). - Available at: - \url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts} - (Accessed: 9 March 2026). -\item - LangChain (2024). LangGraph: Building Stateful, Multi-agent - Applications with LLMs. Available at: - \url{https://docs.langchain.com/oss/python/langgraph/workflows-agents} - (Accessed: 9 March 2026). -\item - Meta AI (2024). Llama 3: Open-weight Large Language Models. Available - at: \url{https://llama.meta.com/llama3/} (Accessed: 9 March 2026). -\item - PostgreSQL Global Development Group (2024). pgvector: Open-source - vector similarity search for PostgreSQL. Available at: - \url{https://github.com/pgvector/pgvector} (Accessed: 9 March 2026). -\item - Pinecone (2023). Retrieval Augmented Generation (RAG) and Semantic - Search. Available at: - \url{https://www.pinecone.io/learn/retrieval-augmented-generation/} - (Accessed: 9 March 2026). -\item - Dettmers, T. (2023). 4-bit Quantization and Bitsandbytes for LLMs. - Available at: - \url{https://huggingface.co/blog/4bit-transformers-bitsandbytes} (Accessed: - 9 March 2026). -\item - vLLM Team (2024). High-Throughput Serving with PagedAttention. - Available at: \url{https://vllm.ai} (Accessed: 9 March 2026). -\item - Django Software Foundation (2024). Django Channels: Real-time - WebSockets for Python. Available at: - \url{https://channels.readthedocs.io/en/stable/} (Accessed: 9 March 2026). -\item - Django Software Foundation (2024). Django Documentation. - Available at: \url{https://docs.djangoproject.com/} (Accessed: 9 March 2026). -\item - Encode OSS (2024). Django REST framework Documentation. - Available at: \url{https://www.django-rest-framework.org/} (Accessed: 9 March 2026). -\item - Celery Project (2024). Celery Documentation. Available at: \url{https://docs.celeryq.dev/} (Accessed: 9 March 2026). -\item - Redis Ltd. (2024). Redis Documentation. Available at: \url{https://redis.io/docs/} (Accessed: 9 March 2026). -\item - FastAPI (2024). FastAPI Documentation. Available at: \url{https://fastapi.tiangolo.com/} (Accessed: 9 March 2026). -\item - UKPLab / SBERT (2024). Sentence-Transformers Documentation. - Available at: \url{https://www.sbert.net/} (Accessed: 9 March 2026). -\item - Abetlen (2024). llama-cpp-python Documentation. - Available at: \url{https://github.com/abetlen/llama-cpp-python} (Accessed: 9 March 2026). -\item - ggml-org (2024). llama.cpp Documentation. - Available at: \url{https://github.com/ggml-org/llama.cpp} (Accessed: 9 March 2026). -\item - PyTorch Team (2024). PyTorch Documentation. Available at: \url{https://pytorch.org/docs/} (Accessed: 9 March 2026). -\end{itemize} +\bibliographystyle{unsrtnat} +\bibliography{references} \end{document}