diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py
index 9b65119..08fe55b 100644
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@@ -64,16 +64,13 @@ def ingest_training_file_task(self, file_uuid):
         all_documents = []
         chunk_counter = 0
                                                            
-        timeout = Timeout(60.0) 
-
-        with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
-                                                              
-            for text_segment in _get_text_chunks(raw_text):
+        with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
+            for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE):
                 response = client.post(
                     settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT,
                     json={
                         "text": text_segment,
-                        "threshold": 95,
+                        "threshold": settings.SEMANTIC_CHUNK_THRESHOLD,
                     },
                 )
                 response.raise_for_status()
@@ -180,14 +177,13 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
     ]
 
     try:
-        with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
+        with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client:
             for agent_type, user_prompt in refine_calls:
                 if agent_type not in configs:
                     continue
                 response = client.post(
                     settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                     json={
-                        "model": "meta-llama-3.1-8b-instruct",
                         "messages": [{"role": "user", "content": user_prompt}],
                         "max_tokens": 600,
                     },
diff --git a/apps/onboarding/consumers/base.py b/apps/onboarding/consumers/base.py
index 221307c..86f5be6 100644
--- a/apps/onboarding/consumers/base.py
+++ b/apps/onboarding/consumers/base.py
@@ -78,7 +78,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
 
     ### MCP Handling ###
     async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5, 
-                          max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str:
+                          max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str:
         """ 
         Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps. 
         """
@@ -153,7 +153,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
             payload["stop"] = stop
         try:
             chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
+            async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                 async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
diff --git a/apps/onboarding/consumers/knowledge.py b/apps/onboarding/consumers/knowledge.py
index 7d43401..086d3b7 100644
--- a/apps/onboarding/consumers/knowledge.py
+++ b/apps/onboarding/consumers/knowledge.py
@@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
             payload['stop'] = stop
         try:
             chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
+            async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                 async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py
index e9ef1ec..4536598 100644
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@@ -65,7 +65,7 @@ class MCPRouter:
 
     async def _get_embedding(self, text):
         logger.info('MCP embedding request started')
-        async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+        async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
             response = await client.post(
                 settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                 json={'input': text},
diff --git a/apps/onboarding/viewsets.py b/apps/onboarding/viewsets.py
index fae9463..d644b7d 100644
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
             prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
 
             try:
-                with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+                with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                     response = client.post(
                         settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                         json={
@@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
         context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
         prompt = f"Context:\n{context}\n\nQuestion: {message}"
         try:
-            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+            with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                 response = client.post(
                     settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                     json={
@@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
             f"Return only the revised page body."
         )
         try:
-            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
+            with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client:
                 response = client.post(
                     settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                     json={
diff --git a/config/settings.py b/config/settings.py
index 38278fc..ad286f0 100644
--- a/config/settings.py
+++ b/config/settings.py
@@ -35,7 +35,11 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
 INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
 INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"
 INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600'))
+INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60'))
+INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120'))
 EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768'))
+INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000'))
+SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95'))
 
 STATIC_URL  = os.getenv('DJANGO_STATIC_URL',  '/static/')
 MEDIA_URL   = os.getenv('DJANGO_MEDIA_URL',  '/media/')