Text-to-SQL query adapter¶
Install¶
uv pip install 'cratedb-toolkit[nlsql]'
Usage¶
CLI¶
export CRATEDB_CLUSTER_URL=crate://localhost/
export LLM_PROVIDER=openai
export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
export CRATEDB_CLUSTER_URL=crate://localhost/
export LLM_PROVIDER=anthropic
export ANTHROPIC_API_KEY=<YOUR_ANTHROPIC_API_KEY>
export CRATEDB_CLUSTER_URL=crate://localhost/
export LLM_PROVIDER=ollama
export LLM_ENDPOINT="http://100.83.17.54:11434/"
ollama pull gemma3:1b
API¶
import sqlalchemy as sa
from cratedb_toolkit.query.nlsql.api import DataQuery
from cratedb_toolkit.query.nlsql.model import DatabaseInfo, ModelInfo, ModelProvider
engine = sa.create_engine("crate://")
schema = "doc"
# Use Open AI GPT-4.
dataquery = DataQuery(
db=DatabaseInfo(engine=engine, schema=schema),
model=ModelInfo(provider=ModelProvider.OPENAI, name="gpt-4.1"),
)
# Use Anthropic Claude Sonnet.
dataquery = DataQuery(
db=DatabaseInfo(engine=engine, schema=schema),
model=ModelInfo(provider=ModelProvider.ANTHROPIC, name="claude-sonnet-4-0"),
)
# Use Google Gemma3 via Ollama.
dataquery = DataQuery(
db=DatabaseInfo(engine=engine, schema=schema),
model=ModelInfo(provider=ModelProvider.OLLAMA, name="gemma3:1b"),
)
response = dataquery.ask("What is the average value for sensor 1?")
print(response)
Example¶
CREATE TABLE IF NOT EXISTS time_series_data (
timestamp TIMESTAMP,
value DOUBLE,
location STRING,
sensor_id INT
);
INSERT INTO time_series_data (timestamp, value, location, sensor_id)
VALUES
('2023-09-14T00:00:00', 10.5, 'Sensor A', 1),
('2023-09-14T01:00:00', 15.2, 'Sensor A', 1),
('2023-09-14T02:00:00', 18.9, 'Sensor A', 1),
('2023-09-14T03:00:00', 12.7, 'Sensor B', 2),
('2023-09-14T04:00:00', 17.3, 'Sensor B', 2),
('2023-09-14T05:00:00', 20.1, 'Sensor B', 2),
('2023-09-14T06:00:00', 22.5, 'Sensor A', 1),
('2023-09-14T07:00:00', 18.3, 'Sensor A', 1),
('2023-09-14T08:00:00', 16.8, 'Sensor A', 1),
('2023-09-14T09:00:00', 14.6, 'Sensor B', 2),
('2023-09-14T10:00:00', 13.2, 'Sensor B', 2),
('2023-09-14T11:00:00', 11.7, 'Sensor B', 2);
REFRESH TABLE time_series_data;
ctk query nlsql "What is the average value for sensor 1?"
Answer: The average value for sensor 1 is approximately 17.03.