Code Examples
Code examples
Complete working examples for common use cases.
Switching from Anthropic or Bedrock?
Native provider SDKs use different tool-use payloads. The provider migration guide shows flip-able OpenAI, Anthropic Messages, and Bedrock Converse examples.
Basic tool-calling agent
A minimal agent loop with 5 tools. Orqen runs invisibly — just change the base_url.
from openai import OpenAI
import json
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
TOOLS = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a city. Use when user asks about weather, temperature, or forecast.",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
},
# ... add more tools. Orqen will prune to the relevant ones automatically.
]
def run_agent(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
for _ in range(10): # max 10 rounds
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=TOOLS,
tool_choice="auto",
)
choice = response.choices[0]
if not choice.message.tool_calls:
return choice.message.content # final answer
# Append assistant turn and execute tools
messages.append(choice.message)
for tc in choice.message.tool_calls:
result = execute_tool(tc.function.name, json.loads(tc.function.arguments))
messages.append({
"role": "tool",
"tool_call_id": tc.id,
"content": json.dumps(result),
})
return "Max rounds reached."
def execute_tool(name: str, args: dict) -> dict:
if name == "get_weather":
return {"temperature": 15.7, "conditions": "partly cloudy", "city": args["city"]}
return {"error": f"Unknown tool: {name}"}
if __name__ == "__main__":
print(run_agent("What's the weather in London today?"))Automatic model routing
Let Orqen pick the best model based on task complexity and your connected providers.
from openai import OpenAI
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
# Simple query → Orqen routes to a fast, cheap model (e.g. Haiku or GPT-4o-mini)
response = client.chat.completions.create(
model="orqen/auto",
messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
# Complex analysis → Orqen routes to a capable model (e.g. Sonnet or GPT-4o)
response = client.chat.completions.create(
model="orqen/auto",
messages=[{"role": "user", "content": "Analyse this codebase and suggest architectural improvements..."}],
tools=[...],
)
# Always cheapest
response = client.chat.completions.create(model="orqen/cheap", messages=[...])
# Always fastest (by observed latency)
response = client.chat.completions.create(model="orqen/fast", messages=[...])Streaming
Orqen supports streaming responses. Tool pruning happens before the stream begins, so the first token arrives without extra latency.
from openai import OpenAI
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Explain quantum computing in simple terms."}],
tools=[...],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)Inspecting pruning results
Every response includes headers showing what Orqen did. Access them via the raw response:
import httpx
import json
headers = {
"Authorization": "Bearer sk-orq-YOUR_KEY",
"Content-Type": "application/json",
}
body = {
"model": "gpt-4o",
"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
"tools": [...], # your tools (e.g. 51 in bedrock_multi_tool_agent.py)
}
with httpx.Client() as client:
r = client.post("https://api.orqen.app/v1/chat/completions", headers=headers, json=body)
print("Tools in: ", r.headers.get("x-orqen-tools-input"))
print("Tools out: ", r.headers.get("x-orqen-tools-output"))
print("Prune ratio: ", r.headers.get("x-orqen-prune-ratio"))
print("Routing method:", r.headers.get("x-orqen-routing"))
print()
print(r.json()["choices"][0]["message"])Error handling
from openai import OpenAI, APIStatusError, RateLimitError
import time
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
def call_with_retry(messages, tools, max_retries=3):
for attempt in range(max_retries):
try:
return client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
)
except RateLimitError:
# Orqen rate limit — wait and retry
wait = 60 * (attempt + 1)
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
except APIStatusError as e:
if e.status_code == 503:
# Orqen temporarily unavailable — retry with short wait
time.sleep(5)
elif e.status_code in (400, 401, 403):
raise # don't retry auth/validation errors
else:
raise
raise RuntimeError("Max retries exceeded")