Skip to main content
The official Python SDK for Octen. Search the web, create text embeddings, and web chat with LLMs.

Get API Key

Get your API key from the API Platform

PyPI

Download and view the SDK on PyPI

Install

pip install octen
Requires Python 3.8+

Quick Start

from octen import Octen

client = Octen(api_key="your-api-key") # or set OCTEN_API_KEY env var
Search the web with fast mode.
results = client.search.search(
    query="blog post about artificial intelligence",
    count=10
)
results = client.search.search(
    query="climate tech news",
    count=20,
    start_time="2024-01-01T00:00:00Z",
    include_domains=["techcrunch.com", "wired.com"]
)
With highlights and full content:
from octen import HighlightOptions, FullContentOptions

results = client.search.search(
    query="vector database comparison",
    count=10,
    highlight=HighlightOptions(enable=True, max_tokens=500),
    full_content=FullContentOptions(enable=True, max_tokens=2000),
    format="markdown"
)
Use simple_search for quick lookups:
results = client.search.simple_search("Python web framework", count=5)

Chat

Send messages to LLMs with optional web search and streaming.
from octen import ChatMessage

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="What is quantum computing?")]
)
print(response.text)
Multi-turn conversation:
response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[
        ChatMessage(role="system", content="You are a helpful assistant."),
        ChatMessage(role="user", content="What is Python?"),
        ChatMessage(role="assistant", content="Python is a programming language."),
        ChatMessage(role="user", content="What are its main use cases?"),
    ]
)
With web search:
response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Latest AI news today")],
    web_search="on"
)
print(response.text)
print(response.search_results)
print(response.citations)
from octen import WebSearchOptions

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Climate tech breakthroughs")],
    web_search="on",
    web_search_options=WebSearchOptions(
        count=10,
        include_domains=["nature.com", "science.org"],
        start_time="2025-01-01T00:00:00Z",
        format="markdown",
        full_content={"enable": True, "max_tokens": 2000}
    )
)
With streaming:
stream = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Write a short poem")],
    stream=True
)

for event in stream:
    if event.type == "content":
        print(event.choices[0].delta.content or "", end="", flush=True)
    elif event.type == "search_done":
        print(f"\nSearch results: {event.search_results}")
    elif event.type == "usage":
        print(f"\nTokens: {event.usage.total_tokens}")

Embedding

Create text embeddings with models of different sizes.
response = client.embedding.create(
    input="Hello, world!",
    model="octen-embedding-4b"
)
vector = response.get_first_embedding()
response = client.embedding.create(
    input=["first document", "second document", "third document"],
    model="octen-embedding-8b",
    input_type="document"
)
vectors = response.get_embeddings()
Available models: octen-embedding-0.6b, octen-embedding-4b, octen-embedding-8b Convenience methods for common patterns:
# Embed a search query
vector = client.embedding.embed_query("search text")

# Embed a batch of documents
vectors = client.embedding.embed_documents(["doc 1", "doc 2", "doc 3"])

Async

Use AsyncOcten for async operations.
import asyncio
from octen import AsyncOcten

async def main():
    async with AsyncOcten(api_key="your-api-key") as client:
        results = await client.search.search(
            query="machine learning startups",
            count=10
        )

asyncio.run(main())
Run search, embedding, and chat concurrently:
from octen import ChatMessage

async with AsyncOcten(api_key="your-api-key") as client:
    search_result, embed_result, chat_result = await asyncio.gather(
        client.search.search(query="AI news", count=5),
        client.embedding.create(input="Hello world"),
        client.chat.create(
            model="openai/gpt-5.4",
            messages=[ChatMessage(role="user", content="Hello!")]
        )
    )
Async streaming:
async with AsyncOcten(api_key="your-api-key") as client:
    stream = await client.chat.create(
        model="openai/gpt-5.4",
        messages=[ChatMessage(role="user", content="Tell me a story")],
        stream=True
    )
    async for event in stream:
        if event.type == "content":
            print(event.choices[0].delta.content or "", end="", flush=True)

Error Handling

from octen import (
    OctenAuthenticationError,
    OctenRateLimitError,
    OctenTimeoutError,
    OctenConnectionError,
    OctenStreamError,
    OctenAPIError,
)

try:
    results = client.search.search("query")
except OctenAuthenticationError:
    print("Invalid API key")
except OctenRateLimitError as e:
    print(f"Rate limited, retry after {e.retry_after}s")
except OctenTimeoutError:
    print("Request timed out")
except OctenConnectionError:
    print("Network connection failed")
except OctenStreamError as e:
    print(f"Stream error: {e.message} (code: {e.code})")
except OctenAPIError as e:
    print(f"API error: {e.status_code} - {e.message}")
Requests that fail due to timeouts, rate limits, or server errors (5xx) are automatically retried with exponential backoff.