Code Examples
Code examples
Complete working examples for common use cases.
Switching from Anthropic or Bedrock?
Native provider SDKs use different tool-use payloads. The provider migration guide shows flip-able OpenAI, Anthropic Messages, and Bedrock Converse examples.
Basic tool-calling agent
A minimal multi-turn loop. Pick your SDK — tools stay in native Anthropic, OpenAI, or Bedrock shape. Full walkthrough: Python agent example.
import anthropic
import json
import os
client = anthropic.Anthropic(
api_key=os.environ["ORQEN_API_KEY"],
base_url="https://api.orqen.app",
)
TOOLS = [
{
"name": "get_weather",
"description": "Get current weather for a city.",
"input_schema": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
]
def call_tool(name: str, args: dict) -> dict:
if name == "get_weather":
return {"city": args["city"], "temperature_c": 13, "conditions": "light rain"}
return {"error": f"Unknown tool: {name}"}
def run(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
for _ in range(8):
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=4096,
messages=messages,
tools=TOOLS,
)
if response.stop_reason == "end_turn":
for block in response.content:
if hasattr(block, "text"):
return block.text
return ""
if response.stop_reason != "tool_use":
break
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type != "tool_use":
continue
result = call_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": json.dumps(result),
})
messages.append({"role": "user", "content": tool_results})
return "Stopped after max tool rounds."
print(run("What is the weather in London?"))Automatic model routing
Let Orqen pick the best model based on task complexity and your connected providers.
from openai import OpenAI
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
# Simple query → Orqen routes to a fast, cheap model (e.g. Haiku or GPT-4o-mini)
response = client.chat.completions.create(
model="orqen/auto",
messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
# Complex analysis → Orqen routes to a capable model (e.g. Sonnet or GPT-4o)
response = client.chat.completions.create(
model="orqen/auto",
messages=[{"role": "user", "content": "Analyse this codebase and suggest architectural improvements..."}],
tools=[...],
)
# Always cheapest
response = client.chat.completions.create(model="orqen/cheap", messages=[...])
# Always fastest (by observed latency)
response = client.chat.completions.create(model="orqen/fast", messages=[...])Streaming
Orqen supports streaming responses. Payload optimization happens before the stream begins, so the first token can arrive without waiting on mid-stream processing.
from openai import OpenAI
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Explain quantum computing in simple terms."}],
tools=[...],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)Inspecting optimization results
Every response includes headers showing what Orqen did. Access them via the raw response:
import httpx
import json
headers = {
"Authorization": "Bearer sk-orq-YOUR_KEY",
"Content-Type": "application/json",
}
body = {
"model": "gpt-4o",
"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
"tools": [...], # your tools (e.g. 51 in bedrock_multi_tool_agent.py)
}
with httpx.Client() as client:
r = client.post("https://api.orqen.app/v1/chat/completions", headers=headers, json=body)
print("Tools in: ", r.headers.get("x-orqen-tools-input"))
print("Tools out: ", r.headers.get("x-orqen-tools-output"))
print("Tool ratio: ", r.headers.get("x-orqen-prune-ratio"))
print("Routing method:", r.headers.get("x-orqen-routing"))
print()
print(r.json()["choices"][0]["message"])Benchmark your agent
Run the same query direct and via Orqen and compare input token counts. One dependency, no mocking — numbers come straight from the provider response. Full walkthrough: benchmark script.
pip install httpx
export ORQEN_API_KEY=sk-orq-...
export ANTHROPIC_API_KEY=sk-ant-...
python benchmark.pyFramework integrations
LangChain, LangGraph, and MCP work with Orqen unchanged — just swap the API key and base URL. Full walkthrough: framework integrations.
from langchain_openai import ChatOpenAI
# Before: ChatOpenAI(model="gpt-4o", api_key="sk-...")
llm = ChatOpenAI(
model="gpt-4o",
api_key=os.environ["ORQEN_API_KEY"],
base_url="https://api.orqen.app/v1",
)
llm_with_tools = llm.bind_tools(ALL_TOOLS) # Orqen prunes per requestError handling
from openai import OpenAI, APIStatusError, RateLimitError
import time
client = OpenAI(
api_key="sk-orq-YOUR_KEY",
base_url="https://api.orqen.app/v1",
)
def call_with_retry(messages, tools, max_retries=3):
for attempt in range(max_retries):
try:
return client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
)
except RateLimitError:
# Orqen rate limit — wait and retry
wait = 60 * (attempt + 1)
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
except APIStatusError as e:
if e.status_code == 503:
# Orqen temporarily unavailable — retry with short wait
time.sleep(5)
elif e.status_code in (400, 401, 403):
raise # don't retry auth/validation errors
else:
raise
raise RuntimeError("Max retries exceeded")