The Ollama adapter provides access to local models running via Ollama, allowing you to run AI models on your own infrastructure with full privacy and no API costs.
npm install @tanstack/ai-ollama
npm install @tanstack/ai-ollama
import { chat } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
const stream = chat({
adapter: ollamaText("llama3"),
messages: [{ role: "user", content: "Hello!" }],
});
import { chat } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
const stream = chat({
adapter: ollamaText("llama3"),
messages: [{ role: "user", content: "Hello!" }],
});
import { chat } from "@tanstack/ai";
import { createOllamaChat } from "@tanstack/ai-ollama";
const adapter = createOllamaChat("http://your-server:11434");
const stream = chat({
adapter: adapter("llama3"),
messages: [{ role: "user", content: "Hello!" }],
});
import { chat } from "@tanstack/ai";
import { createOllamaChat } from "@tanstack/ai-ollama";
const adapter = createOllamaChat("http://your-server:11434");
const stream = chat({
adapter: adapter("llama3"),
messages: [{ role: "user", content: "Hello!" }],
});
import { createOllamaChat } from "@tanstack/ai-ollama";
// Default localhost
const adapter = createOllamaChat();
// Custom host
const adapter = createOllamaChat("http://your-server:11434");
import { createOllamaChat } from "@tanstack/ai-ollama";
// Default localhost
const adapter = createOllamaChat();
// Custom host
const adapter = createOllamaChat("http://your-server:11434");
To see available models on your Ollama instance:
ollama list
ollama list
import { chat, toServerSentEventsResponse } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: ollamaText("llama3"),
messages,
});
return toServerSentEventsResponse(stream);
}
import { chat, toServerSentEventsResponse } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: ollamaText("llama3"),
messages,
});
return toServerSentEventsResponse(stream);
}
import { chat, toolDefinition } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
import { z } from "zod";
const getLocalDataDef = toolDefinition({
name: "get_local_data",
description: "Get data from local storage",
inputSchema: z.object({
key: z.string(),
}),
});
const getLocalData = getLocalDataDef.server(async ({ key }) => {
// Access local data
return { data: "..." };
});
const stream = chat({
adapter: ollamaText("llama3"),
messages,
tools: [getLocalData],
});
import { chat, toolDefinition } from "@tanstack/ai";
import { ollamaText } from "@tanstack/ai-ollama";
import { z } from "zod";
const getLocalDataDef = toolDefinition({
name: "get_local_data",
description: "Get data from local storage",
inputSchema: z.object({
key: z.string(),
}),
});
const getLocalData = getLocalDataDef.server(async ({ key }) => {
// Access local data
return { data: "..." };
});
const stream = chat({
adapter: ollamaText("llama3"),
messages,
tools: [getLocalData],
});
Note: Tool support varies by model. Models like llama3, mistral, and qwen2 generally have good tool calling support.
Ollama supports various provider-specific options:
const stream = chat({
adapter: ollamaText("llama3"),
messages,
modelOptions: {
temperature: 0.7,
top_p: 0.9,
top_k: 40,
num_predict: 1000, // Max tokens to generate
repeat_penalty: 1.1,
num_ctx: 4096, // Context window size
num_gpu: -1, // GPU layers (-1 = auto)
},
});
const stream = chat({
adapter: ollamaText("llama3"),
messages,
modelOptions: {
temperature: 0.7,
top_p: 0.9,
top_k: 40,
num_predict: 1000, // Max tokens to generate
repeat_penalty: 1.1,
num_ctx: 4096, // Context window size
num_gpu: -1, // GPU layers (-1 = auto)
},
});
modelOptions: {
// Sampling
temperature: 0.7,
top_p: 0.9,
top_k: 40,
min_p: 0.05,
typical_p: 1.0,
// Generation
num_predict: 1000,
repeat_penalty: 1.1,
repeat_last_n: 64,
penalize_newline: false,
// Performance
num_ctx: 4096,
num_batch: 512,
num_gpu: -1,
num_thread: 0, // 0 = auto
// Memory
use_mmap: true,
use_mlock: false,
// Mirostat sampling
mirostat: 0, // 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0
mirostat_tau: 5.0,
mirostat_eta: 0.1,
}
modelOptions: {
// Sampling
temperature: 0.7,
top_p: 0.9,
top_k: 40,
min_p: 0.05,
typical_p: 1.0,
// Generation
num_predict: 1000,
repeat_penalty: 1.1,
repeat_last_n: 64,
penalize_newline: false,
// Performance
num_ctx: 4096,
num_batch: 512,
num_gpu: -1,
num_thread: 0, // 0 = auto
// Memory
use_mmap: true,
use_mlock: false,
// Mirostat sampling
mirostat: 0, // 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0
mirostat_tau: 5.0,
mirostat_eta: 0.1,
}
Summarize long text content locally:
import { summarize } from "@tanstack/ai";
import { ollamaSummarize } from "@tanstack/ai-ollama";
const result = await summarize({
adapter: ollamaSummarize("llama3"),
text: "Your long text to summarize...",
maxLength: 100,
style: "concise", // "concise" | "bullet-points" | "paragraph"
});
console.log(result.summary);
import { summarize } from "@tanstack/ai";
import { ollamaSummarize } from "@tanstack/ai-ollama";
const result = await summarize({
adapter: ollamaSummarize("llama3"),
text: "Your long text to summarize...",
maxLength: 100,
style: "concise", // "concise" | "bullet-points" | "paragraph"
});
console.log(result.summary);
# macOS
brew install ollama
# Linux
curl -fsSL https://ollama.com/install.sh | sh
# Windows
# Download from https://ollama.com
# macOS
brew install ollama
# Linux
curl -fsSL https://ollama.com/install.sh | sh
# Windows
# Download from https://ollama.com
ollama pull llama3
ollama pull llama3
ollama serve
ollama serve
The server runs on http://localhost:11434 by default.
const adapter = createOllamaChat("http://your-server:11434");
const adapter = createOllamaChat("http://your-server:11434");
To expose Ollama on a network interface:
OLLAMA_HOST=0.0.0.0:11434 ollama serve
OLLAMA_HOST=0.0.0.0:11434 ollama serve
Optionally set the host in environment variables:
OLLAMA_HOST=http://localhost:11434
OLLAMA_HOST=http://localhost:11434
Creates an Ollama text/chat adapter.
Parameters:
Returns: An Ollama text adapter instance.
Creates an Ollama text/chat adapter with a custom host.
Parameters:
Returns: An Ollama text adapter instance.
Creates an Ollama summarization adapter.
Returns: An Ollama summarize adapter instance.
Creates an Ollama summarization adapter with a custom host.
Returns: An Ollama summarize adapter instance.
