452 lines
18 KiB
Python
452 lines
18 KiB
Python
|
|
import json
|
||
|
|
import logging
|
||
|
|
import re
|
||
|
|
import html
|
||
|
|
from typing import Any
|
||
|
|
from django.db import transaction
|
||
|
|
from django.utils import timezone
|
||
|
|
from rest_framework import status
|
||
|
|
from rest_framework.exceptions import PermissionDenied
|
||
|
|
from rest_framework.decorators import action
|
||
|
|
from rest_framework.response import Response
|
||
|
|
from rest_framework.viewsets import ModelViewSet
|
||
|
|
from asgiref.sync import async_to_sync
|
||
|
|
from channels.layers import get_channel_layer
|
||
|
|
|
||
|
|
from apps.mlstore.models import AgentEvent, AgentRun
|
||
|
|
from apps.mlstore import services as ml_services
|
||
|
|
from .models import OnboardingFlow, OnboardingPage, OnboardingField, OnboardingSession
|
||
|
|
from .serializers import (
|
||
|
|
OnboardingFlowSerializer,
|
||
|
|
OnboardingFlowDetailSerializer,
|
||
|
|
OnboardingPageSerializer,
|
||
|
|
OnboardingFieldSerializer,
|
||
|
|
OnboardingSessionSerializer,
|
||
|
|
OnboardingSubmissionSerializer,
|
||
|
|
OnboardingFeedbackSerializer,
|
||
|
|
)
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
def _extract_json(text: str) -> dict[str, Any]:
|
||
|
|
if not text:
|
||
|
|
return {}
|
||
|
|
|
||
|
|
try:
|
||
|
|
return json.loads(text)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# Prefer fenced json blocks
|
||
|
|
fenced = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, re.IGNORECASE)
|
||
|
|
if fenced:
|
||
|
|
try:
|
||
|
|
return json.loads(fenced.group(1))
|
||
|
|
except Exception:
|
||
|
|
return {}
|
||
|
|
|
||
|
|
# Fallback: find first balanced JSON object
|
||
|
|
start = text.find('{')
|
||
|
|
if start == -1:
|
||
|
|
return {}
|
||
|
|
|
||
|
|
depth = 0
|
||
|
|
for idx in range(start, len(text)):
|
||
|
|
char = text[idx]
|
||
|
|
if char == '{':
|
||
|
|
depth += 1
|
||
|
|
elif char == '}':
|
||
|
|
depth -= 1
|
||
|
|
if depth == 0:
|
||
|
|
candidate = text[start:idx + 1]
|
||
|
|
try:
|
||
|
|
return json.loads(candidate)
|
||
|
|
except Exception:
|
||
|
|
return {}
|
||
|
|
|
||
|
|
return {}
|
||
|
|
|
||
|
|
|
||
|
|
def _strip_html(text: str) -> str:
|
||
|
|
if not text:
|
||
|
|
return ""
|
||
|
|
cleaned = re.sub(r"<[^>]+>", " ", text)
|
||
|
|
cleaned = html.unescape(cleaned)
|
||
|
|
return re.sub(r"\s+", " ", cleaned).strip()
|
||
|
|
|
||
|
|
|
||
|
|
def _send_agent_progress_event(agent_run: AgentRun, content: dict):
|
||
|
|
try:
|
||
|
|
AgentEvent.objects.create(
|
||
|
|
execution=agent_run,
|
||
|
|
event_type='progress',
|
||
|
|
content=content,
|
||
|
|
)
|
||
|
|
room_group_name = f"mlstore_agent_{agent_run.agent.uuid}"
|
||
|
|
async_to_sync(get_channel_layer().group_send)(
|
||
|
|
room_group_name,
|
||
|
|
{
|
||
|
|
"type": "mlstore_event",
|
||
|
|
"event_type": "progress",
|
||
|
|
"content": content,
|
||
|
|
"timestamp": timezone.now().isoformat(),
|
||
|
|
},
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
logger.warning("Failed to send progress event: %s", e)
|
||
|
|
|
||
|
|
|
||
|
|
class OnboardingFlowViewSet(ModelViewSet):
|
||
|
|
queryset = OnboardingFlow.objects.select_related('role', 'agent').all()
|
||
|
|
serializer_class = OnboardingFlowSerializer
|
||
|
|
lookup_field = 'uuid'
|
||
|
|
|
||
|
|
def get_queryset(self):
|
||
|
|
qs = super().get_queryset()
|
||
|
|
role_uuid = self.request.query_params.get('role')
|
||
|
|
status_filter = self.request.query_params.get('status')
|
||
|
|
if role_uuid:
|
||
|
|
qs = qs.filter(role__uuid=role_uuid)
|
||
|
|
if status_filter:
|
||
|
|
qs = qs.filter(status=status_filter)
|
||
|
|
return qs
|
||
|
|
|
||
|
|
def get_serializer_class(self):
|
||
|
|
if self.action in ('retrieve', 'pages'):
|
||
|
|
return OnboardingFlowDetailSerializer
|
||
|
|
return super().get_serializer_class()
|
||
|
|
|
||
|
|
@action(detail=True, methods=['get'])
|
||
|
|
def pages(self, request, pk=None, uuid=None):
|
||
|
|
flow = self.get_object()
|
||
|
|
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
|
||
|
|
return Response(serializer.data)
|
||
|
|
|
||
|
|
@action(detail=True, methods=['post'])
|
||
|
|
def generate(self, request, pk=None, uuid=None):
|
||
|
|
flow = self.get_object()
|
||
|
|
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
|
||
|
|
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
|
||
|
|
if not flow.agent or not flow.agent.model or not flow.agent.model.path:
|
||
|
|
return Response(
|
||
|
|
{"error": "flow_agent_model_required"},
|
||
|
|
status=status.HTTP_400_BAD_REQUEST,
|
||
|
|
)
|
||
|
|
|
||
|
|
instructions = request.data.get('instructions') or ''
|
||
|
|
rag_context = ""
|
||
|
|
try:
|
||
|
|
rag_context = ml_services.get_context_for_query(
|
||
|
|
query=f"Create onboarding content for role {flow.role.name}",
|
||
|
|
role_uuid=str(flow.role.uuid),
|
||
|
|
top_k=6,
|
||
|
|
similarity_threshold=0.35,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
logger.warning("Onboarding generation RAG lookup failed: %s", e)
|
||
|
|
prompt = (
|
||
|
|
"You are creating onboarding content as JSON. "
|
||
|
|
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
|
||
|
|
"Do not include explanations or examples.\n"
|
||
|
|
"Do not include HTML tags. Use plain text only.\n"
|
||
|
|
"Each page body must be 3-6 paragraphs, at least 320 words total, and include 1 short list of 3-5 bullets.\n"
|
||
|
|
"Before writing the body, create a brief outline of the key points to cover and include it in meta.outline.\n"
|
||
|
|
"The outline should be a short list of 3-6 bullets, not chain-of-thought.\n"
|
||
|
|
"Do NOT ask about the learner's personal experience. Ask about what someone in the role may encounter.\n"
|
||
|
|
"Do NOT use any select or multiselect fields. Use only text, textarea, number, boolean, or date.\n"
|
||
|
|
"Use the provided context for accurate, role-specific content.\n"
|
||
|
|
"If context is insufficient, make reasonable assumptions without inventing tools or policies.\n"
|
||
|
|
"JSON shape:\n"
|
||
|
|
"{\n"
|
||
|
|
" \"title\": string,\n"
|
||
|
|
" \"description\": string,\n"
|
||
|
|
" \"pages\": [\n"
|
||
|
|
" {\n"
|
||
|
|
" \"title\": string,\n"
|
||
|
|
" \"body\": string,\n"
|
||
|
|
" \"meta\": { \"outline\": [string] },\n"
|
||
|
|
" \"fields\": [\n"
|
||
|
|
" {\n"
|
||
|
|
" \"key\": string,\n"
|
||
|
|
" \"label\": string,\n"
|
||
|
|
" \"type\": one of [text, textarea, number, boolean, date],\n"
|
||
|
|
" \"required\": boolean,\n"
|
||
|
|
" \"help_text\": string,\n"
|
||
|
|
" \"placeholder\": string,\n"
|
||
|
|
" \"options\": []\n"
|
||
|
|
" }\n"
|
||
|
|
" ]\n"
|
||
|
|
" }\n"
|
||
|
|
" ]\n"
|
||
|
|
"}\n"
|
||
|
|
f"Role: {flow.role.name}\n"
|
||
|
|
f"Role description: {flow.role.description}\n"
|
||
|
|
f"Flow title: {flow.title}\n"
|
||
|
|
f"Flow description: {flow.description}\n"
|
||
|
|
f"Extra instructions: {instructions}\n"
|
||
|
|
f"Context:\n{rag_context}\n"
|
||
|
|
)
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = ml_services.infer_with_model(flow.agent.model.path, prompt, {
|
||
|
|
"max_tokens": 1800,
|
||
|
|
"temperature": 0.2,
|
||
|
|
})
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("Onboarding generate inference failed: %s", e, exc_info=True)
|
||
|
|
return Response({"error": "generation_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||
|
|
|
||
|
|
response_text = ''
|
||
|
|
if isinstance(result, dict):
|
||
|
|
response_text = result.get('response') or result.get('result') or ''
|
||
|
|
payload = _extract_json(str(response_text))
|
||
|
|
if not payload or 'pages' not in payload:
|
||
|
|
return Response({"error": "invalid_generation_output", "raw": response_text}, status=status.HTTP_400_BAD_REQUEST)
|
||
|
|
|
||
|
|
with transaction.atomic():
|
||
|
|
flow.title = payload.get('title') or flow.title
|
||
|
|
# Keep existing description on regenerate unless explicitly empty
|
||
|
|
if not flow.description:
|
||
|
|
flow.description = payload.get('description') or flow.description
|
||
|
|
if flow.status != 'draft':
|
||
|
|
flow.status = 'draft'
|
||
|
|
flow.save(update_fields=['title', 'description', 'status'])
|
||
|
|
|
||
|
|
OnboardingPage.objects.filter(flow=flow).delete()
|
||
|
|
|
||
|
|
pages = payload.get('pages') or []
|
||
|
|
for page_index, page in enumerate(pages):
|
||
|
|
body_text = _strip_html(page.get('body') or '')
|
||
|
|
page_obj = OnboardingPage.objects.create(
|
||
|
|
flow=flow,
|
||
|
|
order=page_index,
|
||
|
|
title=page.get('title') or f"Page {page_index + 1}",
|
||
|
|
body=body_text,
|
||
|
|
meta=page.get('meta') or {},
|
||
|
|
)
|
||
|
|
for field_index, field in enumerate(page.get('fields') or []):
|
||
|
|
field_type = field.get('type') or 'text'
|
||
|
|
if field_type not in {"text", "textarea", "number", "boolean", "date"}:
|
||
|
|
field_type = 'text'
|
||
|
|
OnboardingField.objects.create(
|
||
|
|
page=page_obj,
|
||
|
|
order=field_index,
|
||
|
|
key=field.get('key') or f"field_{field_index + 1}",
|
||
|
|
label=field.get('label') or f"Field {field_index + 1}",
|
||
|
|
field_type=field_type,
|
||
|
|
required=bool(field.get('required')),
|
||
|
|
help_text=field.get('help_text') or '',
|
||
|
|
placeholder=field.get('placeholder') or '',
|
||
|
|
options=[],
|
||
|
|
default_value=field.get('default_value') if field.get('default_value') is not None else None,
|
||
|
|
validation=field.get('validation') or {},
|
||
|
|
)
|
||
|
|
|
||
|
|
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
|
||
|
|
return Response(serializer.data)
|
||
|
|
|
||
|
|
@action(detail=True, methods=['post'])
|
||
|
|
def publish(self, request, pk=None, uuid=None):
|
||
|
|
flow = self.get_object()
|
||
|
|
if not request.user.is_authenticated or not getattr(request.user, 'is_manager', False):
|
||
|
|
return Response({"error": "permission_denied"}, status=status.HTTP_403_FORBIDDEN)
|
||
|
|
if flow.status != 'published':
|
||
|
|
flow.status = 'published'
|
||
|
|
flow.save(update_fields=['status'])
|
||
|
|
serializer = OnboardingFlowDetailSerializer(flow, context={'request': request})
|
||
|
|
return Response(serializer.data)
|
||
|
|
|
||
|
|
|
||
|
|
class OnboardingPageViewSet(ModelViewSet):
|
||
|
|
queryset = OnboardingPage.objects.select_related('flow').prefetch_related('fields').all()
|
||
|
|
serializer_class = OnboardingPageSerializer
|
||
|
|
lookup_field = 'uuid'
|
||
|
|
|
||
|
|
|
||
|
|
class OnboardingFieldViewSet(ModelViewSet):
|
||
|
|
queryset = OnboardingField.objects.select_related('page').all()
|
||
|
|
serializer_class = OnboardingFieldSerializer
|
||
|
|
lookup_field = 'uuid'
|
||
|
|
|
||
|
|
|
||
|
|
class OnboardingSessionViewSet(ModelViewSet):
|
||
|
|
queryset = OnboardingSession.objects.select_related('flow', 'user', 'agent_run', 'flow__agent').all()
|
||
|
|
serializer_class = OnboardingSessionSerializer
|
||
|
|
lookup_field = 'uuid'
|
||
|
|
|
||
|
|
def get_queryset(self):
|
||
|
|
qs = super().get_queryset()
|
||
|
|
user = self.request.user
|
||
|
|
if user.is_authenticated and not getattr(user, 'is_manager', False):
|
||
|
|
qs = qs.filter(user=user)
|
||
|
|
return qs
|
||
|
|
|
||
|
|
def perform_create(self, serializer):
|
||
|
|
if not self.request.user or not self.request.user.is_authenticated:
|
||
|
|
raise PermissionDenied("Authentication required")
|
||
|
|
flow = serializer.validated_data.get('flow')
|
||
|
|
agent_run = None
|
||
|
|
if flow and flow.agent:
|
||
|
|
agent_run = AgentRun.objects.create(
|
||
|
|
agent=flow.agent,
|
||
|
|
user=self.request.user,
|
||
|
|
input_data={
|
||
|
|
"type": "onboarding_session",
|
||
|
|
"flow_uuid": str(flow.uuid),
|
||
|
|
"role_uuid": str(flow.role.uuid),
|
||
|
|
},
|
||
|
|
)
|
||
|
|
serializer.save(user=self.request.user, agent_run=agent_run)
|
||
|
|
|
||
|
|
@action(detail=False, methods=['post'])
|
||
|
|
def get_or_create(self, request):
|
||
|
|
if not request.user or not request.user.is_authenticated:
|
||
|
|
raise PermissionDenied("Authentication required")
|
||
|
|
|
||
|
|
flow_uuid = request.data.get('flow')
|
||
|
|
if not flow_uuid:
|
||
|
|
return Response({"error": "flow_required"}, status=status.HTTP_400_BAD_REQUEST)
|
||
|
|
|
||
|
|
try:
|
||
|
|
flow = OnboardingFlow.objects.get(uuid=flow_uuid)
|
||
|
|
except OnboardingFlow.DoesNotExist:
|
||
|
|
return Response({"error": "flow_not_found"}, status=status.HTTP_404_NOT_FOUND)
|
||
|
|
|
||
|
|
session = (
|
||
|
|
OnboardingSession.objects
|
||
|
|
.filter(flow=flow, user=request.user)
|
||
|
|
.exclude(status='completed')
|
||
|
|
.order_by('-updated_at')
|
||
|
|
.first()
|
||
|
|
)
|
||
|
|
|
||
|
|
if not session:
|
||
|
|
agent_run = None
|
||
|
|
if flow.agent:
|
||
|
|
agent_run = AgentRun.objects.create(
|
||
|
|
agent=flow.agent,
|
||
|
|
user=request.user,
|
||
|
|
input_data={
|
||
|
|
"type": "onboarding_session",
|
||
|
|
"flow_uuid": str(flow.uuid),
|
||
|
|
"role_uuid": str(flow.role.uuid),
|
||
|
|
},
|
||
|
|
)
|
||
|
|
session = OnboardingSession.objects.create(
|
||
|
|
flow=flow,
|
||
|
|
user=request.user,
|
||
|
|
agent_run=agent_run,
|
||
|
|
)
|
||
|
|
|
||
|
|
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
|
||
|
|
|
||
|
|
@action(detail=True, methods=['post'])
|
||
|
|
def submit(self, request, pk=None, uuid=None):
|
||
|
|
session = self.get_object()
|
||
|
|
serializer = OnboardingSubmissionSerializer(data=request.data)
|
||
|
|
serializer.is_valid(raise_exception=True)
|
||
|
|
page_uuid = serializer.validated_data['page_uuid']
|
||
|
|
responses = serializer.validated_data['responses']
|
||
|
|
mark_complete = serializer.validated_data.get('mark_complete')
|
||
|
|
|
||
|
|
try:
|
||
|
|
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
|
||
|
|
except OnboardingPage.DoesNotExist:
|
||
|
|
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
|
||
|
|
|
||
|
|
responses_payload = dict(session.responses or {})
|
||
|
|
responses_payload[str(page.uuid)] = responses
|
||
|
|
session.responses = responses_payload
|
||
|
|
session.current_page_order = page.order
|
||
|
|
|
||
|
|
if mark_complete or page.order >= session.flow.pages.count() - 1:
|
||
|
|
session.status = 'completed'
|
||
|
|
session.completed_at = timezone.now()
|
||
|
|
session.save(update_fields=['responses', 'current_page_order', 'status', 'completed_at'])
|
||
|
|
|
||
|
|
if session.agent_run:
|
||
|
|
progress_payload = {
|
||
|
|
"flow_uuid": str(session.flow.uuid),
|
||
|
|
"session_uuid": str(session.uuid),
|
||
|
|
"page_uuid": str(page.uuid),
|
||
|
|
"page_order": page.order,
|
||
|
|
"status": session.status,
|
||
|
|
"responses": responses,
|
||
|
|
}
|
||
|
|
_send_agent_progress_event(session.agent_run, progress_payload)
|
||
|
|
session.agent_run.output_data = {
|
||
|
|
**(session.agent_run.output_data or {}),
|
||
|
|
"onboarding": session.responses,
|
||
|
|
}
|
||
|
|
session.agent_run.save(update_fields=['output_data'])
|
||
|
|
|
||
|
|
return Response(OnboardingSessionSerializer(session, context={'request': request}).data)
|
||
|
|
|
||
|
|
@action(detail=True, methods=['post'])
|
||
|
|
def feedback(self, request, pk=None, uuid=None):
|
||
|
|
session = self.get_object()
|
||
|
|
serializer = OnboardingFeedbackSerializer(data=request.data)
|
||
|
|
serializer.is_valid(raise_exception=True)
|
||
|
|
page_uuid = serializer.validated_data['page_uuid']
|
||
|
|
responses = serializer.validated_data['responses']
|
||
|
|
question = serializer.validated_data.get('question') or ''
|
||
|
|
|
||
|
|
try:
|
||
|
|
page = OnboardingPage.objects.get(flow=session.flow, uuid=page_uuid)
|
||
|
|
except OnboardingPage.DoesNotExist:
|
||
|
|
return Response({"error": "page_not_found"}, status=status.HTTP_404_NOT_FOUND)
|
||
|
|
|
||
|
|
if not session.flow.agent or not session.flow.agent.model or not session.flow.agent.model.path:
|
||
|
|
return Response({"error": "flow_agent_model_required"}, status=status.HTTP_400_BAD_REQUEST)
|
||
|
|
|
||
|
|
prompt = (
|
||
|
|
"You are an onboarding assessor. Provide concise feedback addressed directly to the learner using second-person \"You\" statements.\n"
|
||
|
|
"Return ONLY valid JSON (no prose, no markdown, no code fences).\n"
|
||
|
|
"JSON shape:\n"
|
||
|
|
"{\n"
|
||
|
|
" \"summary\": string\n"
|
||
|
|
"}\n\n"
|
||
|
|
f"Page title: {page.title}\n"
|
||
|
|
f"Page body: {page.body}\n"
|
||
|
|
f"Responses: {json.dumps(responses)}\n"
|
||
|
|
)
|
||
|
|
if question:
|
||
|
|
prompt += f"Learner question: {question}\n"
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = ml_services.infer_with_model(session.flow.agent.model.path, prompt, {
|
||
|
|
"max_tokens": 900,
|
||
|
|
"temperature": 0.2,
|
||
|
|
})
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("Onboarding feedback inference failed: %s", e, exc_info=True)
|
||
|
|
return Response({"error": "feedback_failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||
|
|
|
||
|
|
feedback_text = ''
|
||
|
|
if isinstance(result, dict):
|
||
|
|
feedback_text = result.get('response') or result.get('result') or ''
|
||
|
|
feedback_text = str(feedback_text).strip()
|
||
|
|
|
||
|
|
feedback_payload = _extract_json(feedback_text)
|
||
|
|
if not feedback_payload:
|
||
|
|
feedback_payload = {
|
||
|
|
"summary": feedback_text or "Feedback generated.",
|
||
|
|
}
|
||
|
|
|
||
|
|
responses_payload = dict(session.responses or {})
|
||
|
|
feedback_store = dict(responses_payload.get("__feedback__") or {})
|
||
|
|
feedback_store[str(page.uuid)] = {
|
||
|
|
"feedback": feedback_payload,
|
||
|
|
"question": question,
|
||
|
|
"updated_at": timezone.now().isoformat(),
|
||
|
|
}
|
||
|
|
responses_payload["__feedback__"] = feedback_store
|
||
|
|
session.responses = responses_payload
|
||
|
|
session.save(update_fields=['responses'])
|
||
|
|
|
||
|
|
return Response({
|
||
|
|
"feedback": feedback_payload,
|
||
|
|
"session": OnboardingSessionSerializer(session, context={'request': request}).data,
|
||
|
|
})
|