Python SDK

The official Python SDK for Octen. Search the web, extract URL content, create text embeddings, and web chat with LLMs.

Get API Key

Get your API key from the API Platform

PyPI

Download and view the SDK on PyPI

Install

pip install octen

Requires Python 3.8+

Quick Start

from octen import Octen

client = Octen(api_key="your-api-key") # or set OCTEN_API_KEY env var

Search

Search the web with fast mode.

results = client.search.search(
    query="blog post about artificial intelligence",
    count=10
)

results = client.search.search(
    query="climate tech news",
    count=20,
    start_time="2024-01-01T00:00:00Z",
    include_domains=["techcrunch.com", "wired.com"]
)

With highlights and full content:

from octen import HighlightOptions, FullContentOptions

results = client.search.search(
    query="vector database comparison",
    count=10,
    highlight=HighlightOptions(enable=True, max_tokens=500),
    full_content=FullContentOptions(enable=True, max_tokens=2000),
    format="markdown"
)

Use simple_search for quick lookups:

results = client.search.simple_search("Python web framework", count=5)

Extract

Extract clean markdown content from a list of URLs.

response = client.extract.extract(
    urls=[
        "https://docs.octen.ai/api-reference/search",
        "https://www.who.int/news-room/fact-sheets/detail/influenza-(seasonal)",
    ]
)

for result in response.data.results:
    if result.status == "success":
        print(result.title, result.full_content[:200])

With query for intent-focused highlights:

response = client.extract.extract(
    urls=["https://www.who.int/news-room/fact-sheets/detail/influenza-(seasonal)"],
    query="vaccination guidelines"
)

for result in response.data.results:
    if result.status == "success":
        for highlight in result.highlights:
            print(highlight)

With multimedia resources:

response = client.extract.extract(
    urls=["https://example.com/article"],
    include_images=True,
    include_videos=True,
    include_favicon=True
)

for result in response.data.results:
    if result.status == "success":
        for image in result.images:
            print(image.url)

Handle failures and check billing:

response = client.extract.extract(urls=[
    "https://docs.octen.ai/api-reference/search",
    "https://example.com/non-existent",
])

for result in response.data.results:
    if result.status == "success":
        print(f"[OK] {result.url}")
    else:
        print(f"[FAIL] {result.url}: {result.error_message}")

# Only successful URLs are billed
print(f"Billed: {response.meta.usage.successful_urls}/{response.meta.usage.total_urls}")

Chat

Send messages to LLMs with optional web search and streaming.

from octen import ChatMessage

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="What is quantum computing?")]
)
print(response.text)

Multi-turn conversation:

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[
        ChatMessage(role="system", content="You are a helpful assistant."),
        ChatMessage(role="user", content="What is Python?"),
        ChatMessage(role="assistant", content="Python is a programming language."),
        ChatMessage(role="user", content="What are its main use cases?"),
    ]
)

With web search:

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Latest AI news today")],
    web_search="on"
)
print(response.text)
print(response.search_results)
print(response.citations)

from octen import WebSearchOptions

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Climate tech breakthroughs")],
    web_search="on",
    web_search_options=WebSearchOptions(
        count=10,
        include_domains=["nature.com", "science.org"],
        start_time="2025-01-01T00:00:00Z",
        format="markdown",
        full_content={"enable": True, "max_tokens": 2000}
    )
)

With streaming:

stream = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Write a short poem")],
    stream=True
)

for event in stream:
    if event.type == "content":
        print(event.choices[0].delta.content or "", end="", flush=True)
    elif event.type == "search_done":
        print(f"\nSearch results: {event.search_results}")
    elif event.type == "usage":
        print(f"\nTokens: {event.usage.total_tokens}")

Embedding

Create text embeddings with models of different sizes.

response = client.embedding.create(
    input="Hello, world!",
    model="octen-embedding-4b"
)
vector = response.get_first_embedding()

response = client.embedding.create(
    input=["first document", "second document", "third document"],
    model="octen-embedding-8b",
    input_type="document"
)
vectors = response.get_embeddings()

Available models: octen-embedding-0.6b, octen-embedding-4b, octen-embedding-8b Convenience methods for common patterns:

# Embed a search query
vector = client.embedding.embed_query("search text")

# Embed a batch of documents
vectors = client.embedding.embed_documents(["doc 1", "doc 2", "doc 3"])

Async

Use AsyncOcten for async operations.

import asyncio
from octen import AsyncOcten

async def main():
    async with AsyncOcten(api_key="your-api-key") as client:
        results = await client.search.search(
            query="machine learning startups",
            count=10
        )

asyncio.run(main())

Run search, embedding, and chat concurrently:

from octen import ChatMessage

async with AsyncOcten(api_key="your-api-key") as client:
    search_result, embed_result, chat_result = await asyncio.gather(
        client.search.search(query="AI news", count=5),
        client.embedding.create(input="Hello world"),
        client.chat.create(
            model="openai/gpt-5.4",
            messages=[ChatMessage(role="user", content="Hello!")]
        )
    )

Async streaming:

async with AsyncOcten(api_key="your-api-key") as client:
    stream = await client.chat.create(
        model="openai/gpt-5.4",
        messages=[ChatMessage(role="user", content="Tell me a story")],
        stream=True
    )
    async for event in stream:
        if event.type == "content":
            print(event.choices[0].delta.content or "", end="", flush=True)

Error Handling

from octen import (
    OctenAuthenticationError,
    OctenRateLimitError,
    OctenTimeoutError,
    OctenConnectionError,
    OctenStreamError,
    OctenAPIError,
)

try:
    results = client.search.search("query")
except OctenAuthenticationError:
    print("Invalid API key")
except OctenRateLimitError as e:
    print(f"Rate limited, retry after {e.retry_after}s")
except OctenTimeoutError:
    print("Request timed out")
except OctenConnectionError:
    print("Network connection failed")
except OctenStreamError as e:
    print(f"Stream error: {e.message} (code: {e.code})")
except OctenAPIError as e:
    print(f"API error: {e.status_code} - {e.message}")

Requests that fail due to timeouts, rate limits, or server errors (5xx) are automatically retried with exponential backoff.

API Reference

Showcase

SDKs

Get API Key

PyPI

Install

Quick Start

Search

Extract

Chat

Embedding

Async

Error Handling

API Reference

Showcase

SDKs

Documentation Index

Get API Key

PyPI

​Install

​Quick Start

​Search

​Extract

​Chat

​Embedding

​Async

​Error Handling

Install

Quick Start

Search

Extract

Chat

Embedding

Async

Error Handling