diff --git a/apps/accounts/models.py b/apps/accounts/models.py index 28428c0..f84df27 100644 --- a/apps/accounts/models.py +++ b/apps/accounts/models.py @@ -112,49 +112,28 @@ class Role(IdentifierMixin, TimeStampMixin, Model): def create_default_agents_for_role(sender, instance: Role, created: bool, **kwargs): if created: from apps.onboarding.models import AgentConfig # L: circular import :( - + from apps.onboarding.consumers.prompts import OnboardingPrompts default_agents = [ { 'type': 'curriculum', 'name': f"{instance.name} Curriculum Agent", - 'prompt': ( - f"You are an instructional design assistant for onboarding the role '{instance.name}'. " - "Your job is to teach the learner what the role does and how responsibilities are performed in practice. " - "Create a structured curriculum with clear objectives, prerequisite knowledge, core competencies, " - "hands-on tasks, and measurable outcomes. Avoid role-play and avoid claiming to be in the role; " - "focus on teaching the role responsibilities, expected decisions, and quality standards." - ) + 'prompt': OnboardingPrompts.default_curriculum_prompt(instance.name), }, { 'type': 'knowledge', 'name': f"{instance.name} Knowledge Agent", - 'prompt': ( - f"You are a domain knowledge tutor for the role '{instance.name}'. " - "Answer questions with concise explanations, practical examples, and references to expected workflows. " - "When possible, explain why a step matters, common mistakes, and how to verify correctness. " - "Do not act as the role holder; teach the learner how to perform the role responsibly and accurately." - ) + 'prompt': OnboardingPrompts.default_knowledge_prompt(instance.name), }, { 'type': 'assessment', 'name': f"{instance.name} Assessment Agent", - 'prompt': ( - f"You are an assessment designer for onboarding the role '{instance.name}'. " - "Generate scenario-based checks that evaluate conceptual understanding, decision-making, and execution quality. " - "Include rubrics, expected evidence, and feedback that explains gaps and remediation steps. " - "Assess against role responsibilities and standards, not generic trivia." - ) + 'prompt': OnboardingPrompts.default_assessment_prompt(instance.name), }, { 'type': 'monitor', 'name': f"{instance.name} Progress Monitor", - 'prompt': ( - f"You are a progress coaching assistant for learners training for the role '{instance.name}'. " - "Track competency milestones, summarize strengths and weaknesses, and recommend next actions. " - "Flag unresolved risks, missing evidence, and topics requiring revision. " - "Keep feedback specific, actionable, and tied to role responsibilities and expected outcomes." - ) - } + 'prompt': OnboardingPrompts.default_monitor_prompt(instance.name), + }, ] with transaction.atomic(): diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py index 7865d01..e4faa02 100644 --- a/apps/knowledge/tasks.py +++ b/apps/knowledge/tasks.py @@ -1,4 +1,5 @@ import hashlib +import logging from celery import shared_task from django.conf import settings @@ -9,6 +10,8 @@ from pypdf import PdfReader from apps.knowledge.models import RoleRagDocument, TrainingFile +logger = logging.getLogger(__name__) + def _decode_text_bytes(raw_bytes: bytes) -> str: try: @@ -41,6 +44,10 @@ def _get_text_chunks(text: str, size: int = 10000): @shared_task(name="apps.knowledge.tasks.ingest_training_file_task", bind=True, soft_time_limit=900, time_limit=1200) def ingest_training_file_task(self, file_uuid): + """ + Ingests a training file by extracting text, chunking it, generating embeddings via an external service, + and saving RoleRagDocument entries. Updates the file status accordingly and triggers prompt refinement. + """ try: file_obj = TrainingFile.objects.get(uuid=file_uuid) except TrainingFile.DoesNotExist: @@ -99,6 +106,10 @@ def ingest_training_file_task(self, file_uuid): file_obj.status = 'embedded' file_obj.is_processed = True file_obj.save() + + if file_obj.role_id: + update_agent_prompts_from_file_task.delay(str(file_obj.role.uuid)) + return f"Processed {chunk_counter} chunks via batching." except Exception as e: @@ -106,3 +117,69 @@ def ingest_training_file_task(self, file_uuid): file_obj.description = str(e) file_obj.save() raise e + + +@shared_task(name="apps.knowledge.tasks.update_agent_prompts_from_file_task", bind=True, soft_time_limit=120, time_limit=180) +def update_agent_prompts_from_file_task(self, role_uuid: str): + """ + After a training file is ingested (or deleted), refine the curriculum AgentConfig + system prompt using document content. Resets to the canonical base prompt when no + files remain. + """ + from apps.accounts.models import Role + from apps.onboarding.consumers.prompts import OnboardingPrompts + from apps.onboarding.models import AgentConfig + + try: + role = Role.objects.get(uuid=role_uuid) + except Role.DoesNotExist: + logger.warning("update_agent_prompts_from_file_task: role %s not found", role_uuid) + return + + curriculum_config = AgentConfig.objects.filter(role=role, agent_type='curriculum').first() + if not curriculum_config: + logger.warning("update_agent_prompts_from_file_task: no curriculum config for role %s", role_uuid) + return + + chunk_texts = list( + RoleRagDocument.objects.filter(role=role, is_active=True) + .order_by('training_file_id', 'chunk_index') + .values_list('content', flat=True)[:30] + ) + + # No files left... so we should reset + if not chunk_texts: + curriculum_config.system_prompt = OnboardingPrompts.default_curriculum_prompt(role.name) + curriculum_config.save(update_fields=['system_prompt', 'updated_at']) + logger.info("update_agent_prompts_from_file_task: reset to base prompt for role %s", role_uuid) + return + + combined_text = '\n\n'.join(chunk_texts)[:6000] + base_prompt = OnboardingPrompts.default_curriculum_prompt(role.name) + + try: + with Client(timeout=Timeout(60.0)) as client: + response = client.post( + settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, + json={ + "model": "meta-llama-3.1-8b-instruct", + "messages": [ + { + "role": "user", + "content": OnboardingPrompts.refine_curriculum_prompt( + role.name, base_prompt, combined_text + ), + }, + ], + "max_tokens": 600, + }, + ) + response.raise_for_status() + refined_prompt = response.json()["choices"][0]["message"]["content"].strip() + except Exception as e: + logger.exception("update_agent_prompts_from_file_task: LLM call failed for role %s: %s", role_uuid, e) + return + + curriculum_config.system_prompt = refined_prompt + curriculum_config.save(update_fields=['system_prompt', 'updated_at']) + logger.info("update_agent_prompts_from_file_task: refined curriculum prompt for role %s", role_uuid) diff --git a/apps/knowledge/viewsets.py b/apps/knowledge/viewsets.py index f4219f9..945ad11 100644 --- a/apps/knowledge/viewsets.py +++ b/apps/knowledge/viewsets.py @@ -8,6 +8,7 @@ from apps.accounts.models import Organization, Role from apps.accounts.permissions import can_manage_organization from apps.knowledge.models import RoleRagDocument, TrainingFile from apps.knowledge.serializers import RoleRagDocumentSerializer, TrainingFileSerializer +from apps.knowledge.tasks import update_agent_prompts_from_file_task class TrainingFileViewSet(ModelViewSet): queryset = TrainingFile.objects.all() @@ -89,7 +90,11 @@ class TrainingFileViewSet(ModelViewSet): if not (is_uploader or is_org_owner or is_org_manager): raise PermissionDenied('Permission denied') - return super().destroy(request, *args, **kwargs) + role_uuid = str(instance.role.uuid) if instance.role_id else None + response = super().destroy(request, *args, **kwargs) + if role_uuid: + update_agent_prompts_from_file_task.delay(role_uuid) + return response class RoleRagDocumentViewSet(ReadOnlyModelViewSet): queryset = RoleRagDocument.objects.all() diff --git a/apps/onboarding/consumers/prompts.py b/apps/onboarding/consumers/prompts.py index 3c2ad2b..ce84fc5 100644 --- a/apps/onboarding/consumers/prompts.py +++ b/apps/onboarding/consumers/prompts.py @@ -70,6 +70,57 @@ class OnboardingPrompts: f"Progress context JSON:\n{json.dumps(progress_context)}" ) + ### Default agent system prompts (canonical source of truth) ### + + @staticmethod + def default_curriculum_prompt(role_name: str) -> str: + return ( + f"You are an instructional design assistant for onboarding the role '{role_name}'. " + "Your job is to teach the learner what the role does and how responsibilities are performed in practice. " + "Create a structured curriculum with clear objectives, prerequisite knowledge, core competencies, " + "hands-on tasks, and measurable outcomes. Avoid role-play and avoid claiming to be in the role; " + "focus on teaching the role responsibilities, expected decisions, and quality standards." + ) + + @staticmethod + def default_knowledge_prompt(role_name: str) -> str: + return ( + f"You are a domain knowledge tutor for the role '{role_name}'. " + "Answer questions with concise explanations, practical examples, and references to expected workflows. " + "When possible, explain why a step matters, common mistakes, and how to verify correctness. " + "Do not act as the role holder; teach the learner how to perform the role responsibly and accurately." + ) + + @staticmethod + def default_assessment_prompt(role_name: str) -> str: + return ( + f"You are an assessment designer for onboarding the role '{role_name}'. " + "Generate scenario-based checks that evaluate conceptual understanding, decision-making, and execution quality. " + "Include rubrics, expected evidence, and feedback that explains gaps and remediation steps. " + "Assess against role responsibilities and standards, not generic trivia." + ) + + @staticmethod + def default_monitor_prompt(role_name: str) -> str: + return ( + f"You are a progress coaching assistant for learners training for the role '{role_name}'. " + "Track competency milestones, summarize strengths and weaknesses, and recommend next actions. " + "Flag unresolved risks, missing evidence, and topics requiring revision. " + "Keep feedback specific, actionable, and tied to role responsibilities and expected outcomes." + ) + + @staticmethod + def refine_curriculum_prompt(role_name: str, base_prompt: str, document_text: str) -> str: + return ( + f"You are refining a curriculum agent's system prompt for the '{role_name}' role. " + "Training documents have been uploaded. Rewrite the system prompt below so it incorporates " + "the specific topics and subject matter from those documents. " + "Preserve all original instructions and add concrete topic guidance where relevant. " + "Return ONLY the refined system prompt text — no commentary, no labels.\n\n" + f"Original system prompt:\n{base_prompt}\n\n" + f"Training document content:\n{document_text}" + ) + FALLBACK_SYSTEM_PROMPT = 'You are a helpful onboarding assistant.' KA_HELP_FALLBACK = ( diff --git a/site/src/views/OnboardingView.vue b/site/src/views/OnboardingView.vue index d6a9226..a23ddfd 100644 --- a/site/src/views/OnboardingView.vue +++ b/site/src/views/OnboardingView.vue @@ -113,7 +113,11 @@ const currentPageBody = computed(() => { const renderedBody = computed(() => { if (!currentPageBody.value) return '' - const body = currentPageBody.value.replace(/^#{1,6}\s+.+\n?/, '') + const lines = currentPageBody.value.split('\n') + const firstLineText = lines[0].replace(/^#{1,6}\s*/, '').trim() + const pageTitle = (currentPage.value?.title ?? '').trim() + const startsWithTitle = pageTitle && firstLineText.toLowerCase() === pageTitle.toLowerCase() + const body = startsWithTitle ? lines.slice(1).join('\n').trimStart() : currentPageBody.value return DOMPurify.sanitize(marked.parse(body) as string) }) diff --git a/site/src/views/OrganizationManage.vue b/site/src/views/OrganizationManage.vue index e46f61b..f7fe8d9 100644 --- a/site/src/views/OrganizationManage.vue +++ b/site/src/views/OrganizationManage.vue @@ -969,6 +969,7 @@ onMounted(async () => { " :multiple="false" :auto-upload="false" + :file-list="[]" >