> ## Documentation Index
> Fetch the complete documentation index at: https://docs.octen.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Python SDK

> Install and use the Octen Python SDK

The official Python SDK for Octen. Search the web, extract URL content, create text embeddings, and web chat with LLMs.

<CardGroup cols={2}>
  <Card title="Get API Key" icon="key" href="https://octen.ai/platform/api-keys">
    Get your API key from the API Platform
  </Card>

  <Card title="PyPI" icon="python" href="https://pypi.org/project/octen/">
    Download and view the SDK on PyPI
  </Card>
</CardGroup>

## Install

<CodeGroup>
  ```bash pip theme={null}
  pip install octen
  ```
</CodeGroup>

Requires Python 3.8+

## Quick Start

```python theme={null}
from octen import Octen

client = Octen(api_key="your-api-key") # or set OCTEN_API_KEY env var
```

## Search

Search the web with fast mode.

```python theme={null}
results = client.search.search(
    query="blog post about artificial intelligence",
    count=10
)
```

```python theme={null}
results = client.search.search(
    query="climate tech news",
    count=20,
    start_time="2024-01-01T00:00:00Z",
    include_domains=["techcrunch.com", "wired.com"]
)
```

With highlights and full content:

```python theme={null}
from octen import HighlightOptions, FullContentOptions

results = client.search.search(
    query="vector database comparison",
    count=10,
    highlight=HighlightOptions(enable=True, max_tokens=500),
    full_content=FullContentOptions(enable=True, max_tokens=2000),
    format="markdown"
)
```

Use `simple_search` for quick lookups:

```python theme={null}
results = client.search.simple_search("Python web framework", count=5)
```

## Extract

Extract clean markdown content from a list of URLs.

```python theme={null}
response = client.extract.extract(
    urls=[
        "https://docs.octen.ai/api-reference/search",
        "https://www.who.int/news-room/fact-sheets/detail/influenza-(seasonal)",
    ]
)

for result in response.data.results:
    if result.status == "success":
        print(result.title, result.full_content[:200])
```

With query for intent-focused highlights:

```python theme={null}
response = client.extract.extract(
    urls=["https://www.who.int/news-room/fact-sheets/detail/influenza-(seasonal)"],
    query="vaccination guidelines"
)

for result in response.data.results:
    if result.status == "success":
        for highlight in result.highlights:
            print(highlight)
```

With multimedia resources:

```python theme={null}
response = client.extract.extract(
    urls=["https://example.com/article"],
    include_images=True,
    include_videos=True,
    include_favicon=True
)

for result in response.data.results:
    if result.status == "success":
        for image in result.images:
            print(image.url)
```

Handle failures and check billing:

```python theme={null}
response = client.extract.extract(urls=[
    "https://docs.octen.ai/api-reference/search",
    "https://example.com/non-existent",
])

for result in response.data.results:
    if result.status == "success":
        print(f"[OK] {result.url}")
    else:
        print(f"[FAIL] {result.url}: {result.error_message}")

# Only successful URLs are billed
print(f"Billed: {response.meta.usage.successful_urls}/{response.meta.usage.total_urls}")
```

## Chat

Send messages to LLMs with optional web search and streaming.

```python theme={null}
from octen import ChatMessage

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="What is quantum computing?")]
)
print(response.text)
```

Multi-turn conversation:

```python theme={null}
response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[
        ChatMessage(role="system", content="You are a helpful assistant."),
        ChatMessage(role="user", content="What is Python?"),
        ChatMessage(role="assistant", content="Python is a programming language."),
        ChatMessage(role="user", content="What are its main use cases?"),
    ]
)
```

With web search:

```python theme={null}
response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Latest AI news today")],
    web_search="on"
)
print(response.text)
print(response.search_results)
print(response.citations)
```

```python theme={null}
from octen import WebSearchOptions

response = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Climate tech breakthroughs")],
    web_search="on",
    web_search_options=WebSearchOptions(
        count=10,
        include_domains=["nature.com", "science.org"],
        start_time="2025-01-01T00:00:00Z",
        format="markdown",
        full_content={"enable": True, "max_tokens": 2000}
    )
)
```

With streaming:

```python theme={null}
stream = client.chat.create(
    model="openai/gpt-5.4",
    messages=[ChatMessage(role="user", content="Write a short poem")],
    stream=True
)

for event in stream:
    if event.type == "content":
        print(event.choices[0].delta.content or "", end="", flush=True)
    elif event.type == "search_done":
        print(f"\nSearch results: {event.search_results}")
    elif event.type == "usage":
        print(f"\nTokens: {event.usage.total_tokens}")
```

## Embedding

Create text embeddings with models of different sizes.

```python theme={null}
response = client.embedding.create(
    input="Hello, world!",
    model="octen-embedding-4b"
)
vector = response.get_first_embedding()
```

```python theme={null}
response = client.embedding.create(
    input=["first document", "second document", "third document"],
    model="octen-embedding-8b",
    input_type="document"
)
vectors = response.get_embeddings()
```

Available models: `octen-embedding-0.6b`, `octen-embedding-4b`, `octen-embedding-8b`

Convenience methods for common patterns:

```python theme={null}
# Embed a search query
vector = client.embedding.embed_query("search text")

# Embed a batch of documents
vectors = client.embedding.embed_documents(["doc 1", "doc 2", "doc 3"])
```

## VL Embedding

Create multimodal embeddings from text, images, and videos. Supports fused output (one vector across all modalities) or independent output (one vector per element).

```python theme={null}
response = client.vl_embedding.create(
    model="octen-vl-embedding-large",
    contents=[
        {"text": "A cute orange cat on a wooden chair"},
        {"image": "https://example.com/cat.jpg"},
    ],
    enable_fusion=True,
)
vector = response.get_first_embedding()
```

With multiple modalities and configuration:

```python theme={null}
response = client.vl_embedding.create(
    model="octen-vl-embedding-large",
    contents=[
        {"text": "Outdoor tent, 3-4 person, waterproof"},
        {"image": "https://example.com/tent_setup.jpg"},
        {"image": "https://example.com/tent_inside.jpg"},
        {"video": "https://example.com/tent_demo.mp4"},
    ],
    enable_fusion=True,
    dimension=2048,
    fps=0.3,
)
```

Independent mode (one vector per element):

```python theme={null}
response = client.vl_embedding.create(
    model="octen-vl-embedding",
    contents=[
        {"image": "https://example.com/img-1.jpg"},
        {"image": "https://example.com/img-2.jpg"},
    ],
    enable_fusion=False,
)
vectors = response.get_embeddings()
```

Available models: `octen-vl-embedding`, `octen-vl-embedding-large`

Typed content objects:

```python theme={null}
from octen import VLEmbeddingContent

response = client.vl_embedding.create(
    model="octen-vl-embedding",
    contents=[
        VLEmbeddingContent(text="A photo of a cat"),
        VLEmbeddingContent(image="https://example.com/cat.jpg"),
    ],
    enable_fusion=True,
)
```

## Async

Use `AsyncOcten` for async operations.

```python theme={null}
import asyncio
from octen import AsyncOcten

async def main():
    async with AsyncOcten(api_key="your-api-key") as client:
        results = await client.search.search(
            query="machine learning startups",
            count=10
        )

asyncio.run(main())
```

Run search, embedding, and chat concurrently:

```python theme={null}
from octen import ChatMessage

async with AsyncOcten(api_key="your-api-key") as client:
    search_result, embed_result, chat_result = await asyncio.gather(
        client.search.search(query="AI news", count=5),
        client.embedding.create(input="Hello world"),
        client.chat.create(
            model="openai/gpt-5.4",
            messages=[ChatMessage(role="user", content="Hello!")]
        )
    )
```

Async streaming:

```python theme={null}
async with AsyncOcten(api_key="your-api-key") as client:
    stream = await client.chat.create(
        model="openai/gpt-5.4",
        messages=[ChatMessage(role="user", content="Tell me a story")],
        stream=True
    )
    async for event in stream:
        if event.type == "content":
            print(event.choices[0].delta.content or "", end="", flush=True)
```

## Error Handling

```python theme={null}
from octen import (
    OctenAuthenticationError,
    OctenRateLimitError,
    OctenTimeoutError,
    OctenConnectionError,
    OctenStreamError,
    OctenAPIError,
)

try:
    results = client.search.search("query")
except OctenAuthenticationError:
    print("Invalid API key")
except OctenRateLimitError as e:
    print(f"Rate limited, retry after {e.retry_after}s")
except OctenTimeoutError:
    print("Request timed out")
except OctenConnectionError:
    print("Network connection failed")
except OctenStreamError as e:
    print(f"Stream error: {e.message} (code: {e.code})")
except OctenAPIError as e:
    print(f"API error: {e.status_code} - {e.message}")
```

Requests that fail due to timeouts, rate limits, or server errors (5xx) are automatically retried with exponential backoff.