5-Minute RAG Quickstart¶
This tutorial shows you how to spin up a complete Retrieval-Augmented Generation (RAG) workflow—from table creation through LLM completion—in just a few commands. You’ll see each step in Bash, Python, and JavaScript, and you can click Run to execute everything directly against the LLMosaic APIs.
📺 Watch the 5-Minute RAG Quickstart Video
Configuration¶
Steps¶
Step 0 — Drop existing table¶
Clears out any old items5
table so we start fresh.
DB_BASE=""
DB_BASE=""
Step 1 — Create table¶
Defines an items5
table with a JSON column and a 1024-dimensional vector column.
curl -s -X POST "${DB_BASE}/create-table" \
-H "Authorization: Bearer ${DB_KEY}" \
-H "Content-Type: application/json" \
-d '{
"table_name": "items5",
"columns": [
{ "name": "id", "type": "bigserial", "constraints": "PRIMARY KEY" },
{ "name": "data", "type": "json" },
{ "name": "embedding", "type": "vector(1024)" }
],
"not_exists": true
}'
import requests
payload = {
"table_name": "items5",
"columns": [
{"name":"id","type":"bigserial","constraints":"PRIMARY KEY"},
{"name":"data","type":"json"},
{"name":"embedding","type":"vector(1024)"}
],
"not_exists": True
}
resp = requests.post(
f"{DB_BASE}/create-table",
headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
json=payload
)
print(resp.json())
await fetch(`${DB_BASE}/create-table`, {
method: "POST",
headers: {
"Authorization": `Bearer ${DB_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
table_name: "items5",
columns: [
{ name: "id", type: "bigserial", constraints: "PRIMARY KEY" },
{ name: "data", type: "json" },
{ name: "embedding", type: "vector(1024)" }
],
not_exists: true
})
});
Step 2 — Create vector index¶
Builds a high-performance vector index on the embedding
column using the HNSW algorithm and cosine distance operator. This allows fast nearest-neighbor search over your stored vectors.
curl -s -X POST "${DB_BASE}/create-vector-index" \
-H "Authorization: Bearer ${DB_KEY}" \
-H "Content-Type: application/json" \
-d '{
"table_name": "items5",
"index_name": "items5_embedding_index",
"vector_column": "embedding",
"index_type": "hnsw",
"distance_operator": "vector_cosine_ops",
"not_exists": true
}'
import requests
idx_cfg = {
"table_name":"items5",
"index_name":"items5_embedding_index",
"vector_column":"embedding",
"index_type":"hnsw",
"distance_operator":"vector_cosine_ops",
"not_exists":True
}
resp = requests.post(
f"{DB_BASE}/create-vector-index",
headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
json=idx_cfg
)
print(resp.json())
await fetch(`${DB_BASE}/create-vector-index`, {
method: "POST",
headers: {
"Authorization": `Bearer ${DB_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
table_name: "items5",
index_name: "items5_embedding_index",
vector_column: "embedding",
index_type: "hnsw",
distance_operator: "vector_cosine_ops",
not_exists: true
})
});
Step 3 — Generate test data¶
Creates a small set of JSON documents for ingestion. Each document has an id
and some text
. You’ll later embed and store these in the database as part of the RAG workflow.
Step 4 — Generate embeddings & insert data¶
For each document you generated, this step calls the Embeddings API to get a vector representation, then inserts both the original JSON and its embedding into the items5
table. Embeddings allow semantic search over text.
NUM_DOCS=$(echo "$TEST_DATA" | jq 'length')
for (( i=0; i<NUM_DOCS; i++ )); do
DOC=$(echo "$TEST_DATA" | jq -c ".[$i]")
TEXT=$(echo "$DOC" | jq -r '.text')
EMB=$(curl -s -X POST "${LLM_BASE}/${EMBEDDING_MODEL}/embeddings" \
-H "Authorization: Bearer ${EMBED_KEY}" \
-H "Content-Type: application/json" \
-d "{\"model\":\"${EMBEDDING_MODEL}\",\"input\":[\"${TEXT}\"]}" \
| jq -c '.data[0].embedding')
curl -s -X POST "${DB_BASE}/items5" \
-H "Authorization: Bearer ${DB_KEY}" \
-H "Content-Type: application/json" \
-d "{\"data\":$DOC,\"embedding\":$EMB}"
done
import requests
for doc in docs:
emb_res = requests.post(
f"{LLM_BASE}/{EMBEDDING_MODEL}/embeddings",
headers={"Authorization": f"Bearer {EMBED_KEY}", "Content-Type": "application/json"},
json={"model": EMBEDDING_MODEL, "input": [doc["text"]]}
).json()
embedding = emb_res["data"][0]["embedding"]
resp = requests.post(
f"{DB_BASE}/items5",
headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
json={"data": doc, "embedding": embedding}
)
print(resp.json())
for (const doc of docs) {
const embRes = await fetch(`${LLM_BASE}/${EMBEDDING_MODEL}/embeddings`, {
method: "POST",
headers: {
"Authorization": `Bearer ${EMBED_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({ model: EMBEDDING_MODEL, input: [doc.text] })
});
const embJson = await embRes.json();
const embedding = embJson.data[0].embedding;
await fetch(`${DB_BASE}/items5`, {
method: "POST",
headers: {
"Authorization": `Bearer ${DB_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({ data: doc, embedding })
});
}
Step 5 — Retrieve & generate answer¶
Finally, this step performs a vector search for each document’s embedding to retrieve the most similar stored item, then uses that item as context in an LLM completion asking “What is mentioned about AI?” This demonstrates the core RAG pattern: retrieval + generation.
NUM_DOCS=$(echo "$TEST_DATA" | jq 'length')
for (( i=0; i<NUM_DOCS; i++ )); do
TEXT=$(echo "$TEST_DATA" | jq -r ".[$i].text")
QEMB=$(curl -s -X POST "${LLM_BASE}/${EMBEDDING_MODEL}/embeddings" \
-H "Authorization: Bearer ${EMBED_KEY}" \
-H "Content-Type: application/json" \
-d "{\"model\":\"${EMBEDDING_MODEL}\",\"input\":[\"${TEXT}\"]}" \
| jq -c '.data[0].embedding')
ENCODED=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$QEMB")
HIT=$(curl -s -X GET "${DB_BASE}/items5?query_vector=${ENCODED}&vector_column=embedding&distance_operator=<=>&limit=1" \
-H "Authorization: Bearer ${DB_KEY}")
CONTEXT=$(echo "$HIT" | jq -r '.[0].data.text')
curl -s -X POST "${LLM_BASE}/${LLM_MODEL}/chat/completions" \
-H "Authorization: Bearer ${LLM_KEY}" \
-H "Content-Type: application/json" \
-d "{
\"model\":\"${LLM_MODEL_HF}\",
\"messages\":[{\"role\":\"user\",\"content\":\"Based on the context: ${CONTEXT}, what is mentioned about AI?\"}]
}"
done
import requests, json
for doc in docs:
q_emb_res = requests.post(
f"{LLM_BASE}/{EMBEDDING_MODEL}/embeddings",
headers={"Authorization": f"Bearer {EMBED_KEY}", "Content-Type": "application/json"},
json={"model": EMBEDDING_MODEL, "input": [doc["text"]]}
).json()
q_emb = q_emb_res["data"][0]["embedding"]
params = {
"query_vector": json.dumps(q_emb),
"vector_column": "embedding",
"distance_operator": "<=>",
"limit": 1
}
hit = requests.get(
f"{DB_BASE}/items5",
headers={"Authorization": f"Bearer {DB_KEY}"},
params=params
).json()
context = hit[0]["data"]["text"]
llm_resp = requests.post(
f"{LLM_BASE}/{LLM_MODEL}/chat/completions",
headers={"Authorization": f"Bearer {LLM_KEY}", "Content-Type": "application/json"},
json={
"model": LLM_MODEL_HF,
"messages":[{"role":"user","content":f"Based on the context: {context}, what is mentioned about AI?"}]
}
)
print(llm_resp.json())
for (const doc of docs) {
const qRes = await fetch(`${LLM_BASE}/${EMBEDDING_MODEL}/embeddings`, {
method: "POST",
headers: {
"Authorization": `Bearer ${EMBED_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({ model: EMBEDDING_MODEL, input: [doc.text] })
});
const qJson = await qRes.json();
const qEmb = qJson.data[0].embedding;
const encoded = encodeURIComponent(JSON.stringify(qEmb));
const hitRes = await fetch(
`${DB_BASE}/items5?query_vector=${encoded}&vector_column=embedding&distance_operator=<=>&limit=1`,
{ headers: { "Authorization": `Bearer ${DB_KEY}` } }
);
const hitJson = await hitRes.json();
const context = hitJson[0].data.text;
const chatRes = await fetch(
`${LLM_BASE}/${LLM_MODEL}/chat/completions`,
{
method: "POST",
headers: {
"Authorization": `Bearer ${LLM_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
model: LLM_MODEL_HF,
messages: [{ role: "user", content: `Based on the context: ${context}, what is mentioned about AI?` }]
})
}
);
console.log(await chatRes.json());