import json
import logging
import re
import html
from typing import Any
from django.db import transaction
from django.utils import timezone
from rest_framework import status
from rest_framework.exceptions import PermissionDenied
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.viewsets import ModelViewSet
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
from apps.mlstore.models import AgentEvent, AgentRun
from apps.mlstore import services as ml_services
from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
from .serializers import (
OnboardingFlowSerializer,
OnboardingFlowDetailSerializer,
OnboardingPageSerializer,
OnboardingFieldSerializer,
OnboardingSessionSerializer,
OnboardingSubmissionSerializer,
OnboardingFeedbackSerializer,
)
logger = logging.getLogger(__name__)
def _extract_json(text: str) -> dict[str, Any]:
if not text:
return {}
try:
return json.loads(text)
except Exception:
pass
# Prefer fenced json blocks
fenced = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, re.IGNORECASE)
if fenced:
try:
return json.loads(fenced.group(1))
except Exception:
return {}
# Fallback: find first balanced JSON object
start = text.find('{')
if start == -1:
return {}
depth = 0
for idx in range(start, len(text)):
char = text[idx]
if char == '{':
depth += 1
elif char == '}':
depth -= 1
if depth == 0:
candidate = text[start:idx + 1]
try:
return json.loads(candidate)
except Exception:
return {}
return {}
def _strip_html(text: str) -> str:
if not text:
return ""
cleaned = re.sub(r"<[^>]+>", " ", text)
cleaned = html.unescape(cleaned)
return re.sub(r"\s+", " ", cleaned).strip()
def _send_agent_progress_event(agent_run: AgentRun, content: dict):
try:
AgentEvent.objects.create(
execution=agent_run,
event_type='progress',
content=content,
)
room_group_name = f"mlstore_agent_{agent_run.agent.uuid}"
async_to_sync(get_channel_layer().group_send)(
room_group_name,
{
"type": "mlstore_event",
"event_type": "progress",
"content": content,
"timestamp": timezone.now().isoformat(),
},
)
except Exception as e:
logger.warning("Failed to send progress event: %s", e)
class OnboardingFlowViewSet(ModelViewSet):
queryset = OnboardingFlow.objects.select_related('role', 'agent').all()
serializer_class = OnboardingFlowSerializer
lookup_field = 'uuid'
def get_queryset(self):
qs = super().get_queryset()
role_uuid = self.request.query_params.get('role')
status_filter = self.request.query_params.get('status')
if role_uuid:
qs = qs.filter(role__uuid=role_uuid)
if status_filter:
qs = qs.filter(status=status_filter)
return qs
def get_serializer_class(self):
if self.action in ('retrieve', 'pages'):
return OnboardingFlowDetailSerializer
return super().get_serializer_class()
@action(detail=True, methods=['get'])
def pages(self, request, pk=None, uuid=None):
flow = self.get_object()
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['post'])
def generate(self, request, pk=None, uuid=None):
flow = self.get_object()
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
if not flow.agent or not flow.agent.model or not flow.agent.model.path:
return Response(
{"error": "flow_agent_model_required"},
status=status.HTTP_400_BAD_REQUEST,
)
instructions = request.data.get('instructions') or ''
rag_context = ""
try:
rag_context = ml_services.get_context_for_query(
query=f"Create onboarding content for role {flow.role.name}",
role_uuid=str(flow.role.uuid),
top_k=6,
similarity_threshold=0.35,
)
except Exception as e:
logger.warning("Onboarding generation RAG lookup failed: %s", e)
prompt = (
"You are creating onboarding content as JSON. "
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
"Do not include explanations or examples.\n"
"Do not include HTML tags. Use plain text only.\n"
"Each page body must be 3-6 paragraphs, at least 320 words total, and include 1 short list of 3-5 bullets.\n"
"Before writing the body, create a brief outline of the key points to cover and include it in meta.outline.\n"
"The outline should be a short list of 3-6 bullets, not chain-of-thought.\n"
"Do NOT ask about the learner's personal experience. Ask about what someone in the role may encounter.\n"
"Do NOT use any select or multiselect fields. Use only text, textarea, number, boolean, or date.\n"
"Use the provided context for accurate, role-specific content.\n"
"If context is insufficient, make reasonable assumptions without inventing tools or policies.\n"
"JSON shape:\n"
"{\n"
" \"title\": string,\n"
" \"description\": string,\n"
" \"pages\": [\n"
" {\n"
" \"title\": string,\n"
" \"body\": string,\n"
" \"meta\": { \"outline\": [string] },\n"
" \"fields\": [\n"
" {\n"
" \"key\": string,\n"
" \"label\": string,\n"
" \"type\": one of [text, textarea, number, boolean, date],\n"
" \"required\": boolean,\n"
" \"help_text\": string,\n"
" \"placeholder\": string,\n"
" \"options\": []\n"
" }\n"
" ]\n"
" }\n"
" ]\n"
"}\n"
f"Role: {flow.role.name}\n"
f"Role description: {flow.role.description}\n"
f"Flow title: {flow.title}\n"
f"Flow description: {flow.description}\n"
f"Extra instructions: {instructions}\n"
f"Context:\n{rag_context}\n"
)
try:
result = ml_services.infer_with_model(flow.agent.model.path, prompt, {
"max_tokens": 1800,
"temperature": 0.2,
})
except Exception as e:
logger.error("Onboarding generate inference failed: %s", e, exc_info=True)
return Response({"error": "generation_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
response_text = ''
if isinstance(result, dict):
response_text = result.get('response') or result.get('result') or ''
payload = _extract_json(str(response_text))
if not payload or 'pages' not in payload:
return Response({"error": "invalid_generation_output", "raw": response_text}, status=status.HTTP_400_BAD_REQUEST)
with transaction.atomic():
flow.title = payload.get('title') or flow.title
# Keep existing description on regenerate unless explicitly empty
if not flow.description:
flow.description = payload.get('description') or flow.description
if flow.status != 'draft':
flow.status = 'draft'
flow.save(update_fields=['title', 'description', 'status'])
OnboardingPage.objects.filter(flow=flow).delete()
pages = payload.get('pages') or []
for page_index, page in enumerate(pages):
body_text = _strip_html(page.get('body') or '')
page_obj = OnboardingPage.objects.create(
flow=flow,
order=page_index,
title=page.get('title') or f"Page {page_index + 1}",
body=body_text,
meta=page.get('meta') or {},
)
for field_index, field in enumerate(page.get('fields') or []):
field_type = field.get('type') or 'text'
if field_type not in {"text", "textarea", "number", "boolean", "date"}:
field_type = 'text'
OnboardingField.objects.create(
page=page_obj,
order=field_index,
key=field.get('key') or f"field_{field_index + 1}",
label=field.get('label') or f"Field {field_index + 1}",
field_type=field_type,
required=bool(field.get('required')),
help_text=field.get('help_text') or '',
placeholder=field.get('placeholder') or '',
options=[],
default_value=field.get('default_value') if field.get('default_value') is not None else None,
validation=field.get('validation') or {},
)
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
@action(detail=True, methods=['post'])
def publish(self, request, pk=None, uuid=None):
flow = self.get_object()
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
if flow.status != 'published':
flow.status = 'published'
flow.save(update_fields=['status'])
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
return Response(serializer.data)
class OnboardingPageViewSet(ModelViewSet):
queryset = OnboardingPage.objects.select_related('flow').prefetch_related('fields').all()
serializer_class = OnboardingPageSerializer
lookup_field = 'uuid'
class OnboardingFieldViewSet(ModelViewSet):
queryset = OnboardingField.objects.select_related('page').all()
serializer_class = OnboardingFieldSerializer
lookup_field = 'uuid'
class OnboardingSessionViewSet(ModelViewSet):
queryset = OnboardingSession.objects.select_related('flow', 'user', 'agent_run', 'flow__agent').all()
serializer_class = OnboardingSessionSerializer
lookup_field = 'uuid'
def get_queryset(self):
qs = super().get_queryset()
user = self.request.user
if user.is_authenticated and not getattr(user, 'is_manager', False):
qs = qs.filter(user=user)
return qs
def perform_create(self, serializer):
if not self.request.user or not self.request.user.is_authenticated:
raise PermissionDenied("Authentication required")
flow = serializer.validated_data.get('flow')
agent_run = None
if flow and flow.agent:
agent_run = AgentRun.objects.create(
agent=flow.agent,
user=self.request.user,
input_data={
"type": "onboarding_session",
"flow_uuid": str(flow.uuid),
"role_uuid": str(flow.role.uuid),
},
)
serializer.save(user=self.request.user, agent_run=agent_run)
@action(detail=False, methods=['post'])
def get_or_create(self, request):
if not request.user or not request.user.is_authenticated:
raise PermissionDenied("Authentication required")
flow_uuid = request.data.get('flow')
if not flow_uuid:
return Response({"error": "flow_required"}, status=status.HTTP_400_BAD_REQUEST)
try:
flow = OnboardingFlow.objects.get(uuid=flow_uuid)
except OnboardingFlow.DoesNotExist:
return Response({"error": "flow_not_found"}, status=status.HTTP_404_NOT_FOUND)
session = (
OnboardingSession.objects
.filter(flow=flow, user=request.user)
.exclude(status='completed')
.order_by('-updated_at')
.first()
)
if not session:
agent_run = None
if flow.agent:
agent_run = AgentRun.objects.create(
agent=flow.agent,
user=request.user,
input_data={
"type": "onboarding_session",
"flow_uuid": str(flow.uuid),
"role_uuid": str(flow.role.uuid),
},
)
session = OnboardingSession.objects.create(
flow=flow,
user=request.user,
agent_run=agent_run,
)
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
@action(detail=True, methods=['post'])
def submit(self, request, pk=None, uuid=None):
session = self.get_object()
serializer = OnboardingSubmissionSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
page_uuid = serializer.validated_data['page_uuid']
responses = serializer.validated_data['responses']
mark_complete = serializer.validated_data.get('mark_complete')
try:
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
except OnboardingPage.DoesNotExist:
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
responses_payload = dict(session.responses or {})
responses_payload[str(page.uuid)] = responses
session.responses = responses_payload
session.current_page_order = page.order
if mark_complete or page.order >= session.flow.pages.count() - 1:
session.status = 'completed'
session.completed_at = timezone.now()
session.save(update_fields=['responses', 'current_page_order', 'status', 'completed_at'])
if session.agent_run:
progress_payload = {
"flow_uuid": str(session.flow.uuid),
"session_uuid": str(session.uuid),
"page_uuid": str(page.uuid),
"page_order": page.order,
"status": session.status,
"responses": responses,
}
_send_agent_progress_event(session.agent_run, progress_payload)
session.agent_run.output_data = {
**(session.agent_run.output_data or {}),
"onboarding": session.responses,
}
session.agent_run.save(update_fields=['output_data'])
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
@action(detail=True, methods=['post'])
def feedback(self, request, pk=None, uuid=None):
session = self.get_object()
serializer = OnboardingFeedbackSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
page_uuid = serializer.validated_data['page_uuid']
responses = serializer.validated_data['responses']
question = serializer.validated_data.get('question') or ''
try:
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
except OnboardingPage.DoesNotExist:
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
if not session.flow.agent or not session.flow.agent.model or not session.flow.agent.model.path:
return Response({"error": "flow_agent_model_required"}, status=status.HTTP_400_BAD_REQUEST)
prompt = (
"You are an onboarding assessor. Provide concise feedback addressed directly to the learner using second-person \"You\" statements.\n"
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
"JSON shape:\n"
"{\n"
" \"summary\": string\n"
"}\n\n"
f"Page title: {page.title}\n"
f"Page body: {page.body}\n"
f"Responses: {json.dumps(responses)}\n"
)
if question:
prompt += f"Learner question: {question}\n"
try:
result = ml_services.infer_with_model(session.flow.agent.model.path, prompt, {
"max_tokens": 900,
"temperature": 0.2,
})
except Exception as e:
logger.error("Onboarding feedback inference failed: %s", e, exc_info=True)
return Response({"error": "feedback_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
feedback_text = ''
if isinstance(result, dict):
feedback_text = result.get('response') or result.get('result') or ''
feedback_text = str(feedback_text).strip()
feedback_payload = _extract_json(feedback_text)
if not feedback_payload:
feedback_payload = {
"summary": feedback_text or "Feedback generated.",
}
responses_payload = dict(session.responses or {})
feedback_store = dict(responses_payload.get("__feedback__") or {})
feedback_store[str(page.uuid)] = {
"feedback": feedback_payload,
"question": question,
"updated_at": timezone.now().isoformat(),
}
responses_payload["__feedback__"] = feedback_store
session.responses = responses_payload
session.save(update_fields=['responses'])
return Response({
"feedback": feedback_payload,
"session": OnboardingSessionSerializer(session, context={'request': request}).data,
})