Skip to content

5-Minute RAG Quickstart

This tutorial shows you how to spin up a complete Retrieval-Augmented Generation (RAG) workflow—from table creation through LLM completion—in just a few commands. You’ll see each step in Bash, Python, and JavaScript, and you can click Run to execute everything directly against the LLMosaic APIs.

📺 Watch the 5-Minute RAG Quickstart Video


Steps

Step 0 — Drop existing table

Clears out any old items5 table so we start fresh.

DB_BASE=""
curl -X POST "${DB_BASE}/drop-table" \
    -H "Authorization: Bearer ${DB_KEY}" \
    -H "Content-Type: application/json" \
    -d '{
    "table_name": "items5",
    "if_exists": true
    }'

DB_BASE=""
import requests

resp = requests.post(
    f"{DB_BASE}/drop-table",
    headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
    json={"table_name": "items5", "if_exists": True}
)
print(resp.json())

DB_BASE=""
await fetch(`${DB_BASE}/drop-table`, {
method: "POST",
headers: {
    "Authorization": `Bearer ${DB_KEY}`,
    "Content-Type": "application/json"
},
body: JSON.stringify({ table_name: "items5", if_exists: true })
});


Step 1 — Create table

Defines an items5 table with a JSON column and a 1024-dimensional vector column.

curl -s -X POST "${DB_BASE}/create-table" \
    -H "Authorization: Bearer ${DB_KEY}" \
    -H "Content-Type: application/json" \
    -d '{
  "table_name": "items5",
  "columns": [
    { "name": "id", "type": "bigserial", "constraints": "PRIMARY KEY" },
    { "name": "data", "type": "json" },
    { "name": "embedding", "type": "vector(1024)" }
  ],
  "not_exists": true
}'
import requests

payload = {
  "table_name": "items5",
  "columns": [
    {"name":"id","type":"bigserial","constraints":"PRIMARY KEY"},
    {"name":"data","type":"json"},
    {"name":"embedding","type":"vector(1024)"}
  ],
  "not_exists": True
}

resp = requests.post(
  f"{DB_BASE}/create-table",
  headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
  json=payload
)
print(resp.json())
await fetch(`${DB_BASE}/create-table`, {
  method: "POST",
  headers: {
    "Authorization": `Bearer ${DB_KEY}`,
    "Content-Type": "application/json"
  },
  body: JSON.stringify({
    table_name: "items5",
    columns: [
      { name: "id", type: "bigserial", constraints: "PRIMARY KEY" },
      { name: "data", type: "json" },
      { name: "embedding", type: "vector(1024)" }
    ],
    not_exists: true
  })
});

Step 2 — Create vector index

Builds a high-performance vector index on the embedding column using the HNSW algorithm and cosine distance operator. This allows fast nearest-neighbor search over your stored vectors.

curl -s -X POST "${DB_BASE}/create-vector-index" \
    -H "Authorization: Bearer ${DB_KEY}" \
    -H "Content-Type: application/json" \
    -d '{
  "table_name": "items5",
  "index_name": "items5_embedding_index",
  "vector_column": "embedding",
  "index_type": "hnsw",
  "distance_operator": "vector_cosine_ops",
  "not_exists": true
}'
import requests

idx_cfg = {
  "table_name":"items5",
  "index_name":"items5_embedding_index",
  "vector_column":"embedding",
  "index_type":"hnsw",
  "distance_operator":"vector_cosine_ops",
  "not_exists":True
}

resp = requests.post(
  f"{DB_BASE}/create-vector-index",
  headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
  json=idx_cfg
)
print(resp.json())
await fetch(`${DB_BASE}/create-vector-index`, {
  method: "POST",
  headers: {
    "Authorization": `Bearer ${DB_KEY}`,
    "Content-Type": "application/json"
  },
  body: JSON.stringify({
    table_name: "items5",
    index_name: "items5_embedding_index",
    vector_column: "embedding",
    index_type: "hnsw",
    distance_operator: "vector_cosine_ops",
    not_exists: true
  })
});

Step 3 — Generate test data

Creates a small set of JSON documents for ingestion. Each document has an id and some text. You’ll later embed and store these in the database as part of the RAG workflow.

export TEST_DATA='[
  {"id":1,"text":"The quick brown fox jumps over the lazy dog."},
  {"id":2,"text":"Artificial Intelligence and Machine Learning are revolutionizing the world."},
  {"id":3,"text":"OpenAI'\''s ChatGPT is a state-of-the-art language model."}
]'
echo "$TEST_DATA"
docs = [
  {"id":1,"text":"The quick brown fox jumps over the lazy dog."},
  {"id":2,"text":"Artificial Intelligence and Machine Learning are revolutionizing the world."},
  {"id":3,"text":"OpenAI's ChatGPT is a state-of-the-art language model."}
]
print(docs)
const docs = [
  { id: 1, text: "The quick brown fox jumps over the lazy dog." },
  { id: 2, text: "Artificial Intelligence and Machine Learning are revolutionizing the world." },
  { id: 3, text: "OpenAI's ChatGPT is a state-of-the-art language model." }
];
console.log(docs);

Step 4 — Generate embeddings & insert data

For each document you generated, this step calls the Embeddings API to get a vector representation, then inserts both the original JSON and its embedding into the items5 table. Embeddings allow semantic search over text.

NUM_DOCS=$(echo "$TEST_DATA" | jq 'length')
for (( i=0; i<NUM_DOCS; i++ )); do
  DOC=$(echo "$TEST_DATA" | jq -c ".[$i]")
  TEXT=$(echo "$DOC" | jq -r '.text')
  EMB=$(curl -s -X POST "${LLM_BASE}/${EMBEDDING_MODEL}/embeddings" \
      -H "Authorization: Bearer ${EMBED_KEY}" \
      -H "Content-Type: application/json" \
      -d "{\"model\":\"${EMBEDDING_MODEL}\",\"input\":[\"${TEXT}\"]}" \
    | jq -c '.data[0].embedding')
  curl -s -X POST "${DB_BASE}/items5" \
      -H "Authorization: Bearer ${DB_KEY}" \
      -H "Content-Type: application/json" \
      -d "{\"data\":$DOC,\"embedding\":$EMB}"
done
import requests

for doc in docs:
    emb_res = requests.post(
        f"{LLM_BASE}/{EMBEDDING_MODEL}/embeddings",
        headers={"Authorization": f"Bearer {EMBED_KEY}", "Content-Type": "application/json"},
        json={"model": EMBEDDING_MODEL, "input": [doc["text"]]}
    ).json()
    embedding = emb_res["data"][0]["embedding"]
    resp = requests.post(
        f"{DB_BASE}/items5",
        headers={"Authorization": f"Bearer {DB_KEY}", "Content-Type": "application/json"},
        json={"data": doc, "embedding": embedding}
    )
    print(resp.json())
for (const doc of docs) {
  const embRes = await fetch(`${LLM_BASE}/${EMBEDDING_MODEL}/embeddings`, {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${EMBED_KEY}`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify({ model: EMBEDDING_MODEL, input: [doc.text] })
  });
  const embJson = await embRes.json();
  const embedding = embJson.data[0].embedding;
  await fetch(`${DB_BASE}/items5`, {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${DB_KEY}`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify({ data: doc, embedding })
  });
}

Step 5 — Retrieve & generate answer

Finally, this step performs a vector search for each document’s embedding to retrieve the most similar stored item, then uses that item as context in an LLM completion asking “What is mentioned about AI?” This demonstrates the core RAG pattern: retrieval + generation.

NUM_DOCS=$(echo "$TEST_DATA" | jq 'length')
for (( i=0; i<NUM_DOCS; i++ )); do
  TEXT=$(echo "$TEST_DATA" | jq -r ".[$i].text")
  QEMB=$(curl -s -X POST "${LLM_BASE}/${EMBEDDING_MODEL}/embeddings" \
      -H "Authorization: Bearer ${EMBED_KEY}" \
      -H "Content-Type: application/json" \
      -d "{\"model\":\"${EMBEDDING_MODEL}\",\"input\":[\"${TEXT}\"]}" \
    | jq -c '.data[0].embedding')
  ENCODED=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$QEMB")
  HIT=$(curl -s -X GET "${DB_BASE}/items5?query_vector=${ENCODED}&vector_column=embedding&distance_operator=<=>&limit=1" \
      -H "Authorization: Bearer ${DB_KEY}")
  CONTEXT=$(echo "$HIT" | jq -r '.[0].data.text')
  curl -s -X POST "${LLM_BASE}/${LLM_MODEL}/chat/completions" \
      -H "Authorization: Bearer ${LLM_KEY}" \
      -H "Content-Type: application/json" \
      -d "{
        \"model\":\"${LLM_MODEL_HF}\",
        \"messages\":[{\"role\":\"user\",\"content\":\"Based on the context: ${CONTEXT}, what is mentioned about AI?\"}]
      }"
done
import requests, json

for doc in docs:
    q_emb_res = requests.post(
        f"{LLM_BASE}/{EMBEDDING_MODEL}/embeddings",
        headers={"Authorization": f"Bearer {EMBED_KEY}", "Content-Type": "application/json"},
        json={"model": EMBEDDING_MODEL, "input": [doc["text"]]}
    ).json()
    q_emb = q_emb_res["data"][0]["embedding"]

    params = {
        "query_vector": json.dumps(q_emb),
        "vector_column": "embedding",
        "distance_operator": "<=>",
        "limit": 1
    }
    hit = requests.get(
        f"{DB_BASE}/items5",
        headers={"Authorization": f"Bearer {DB_KEY}"},
        params=params
    ).json()
    context = hit[0]["data"]["text"]

    llm_resp = requests.post(
        f"{LLM_BASE}/{LLM_MODEL}/chat/completions",
        headers={"Authorization": f"Bearer {LLM_KEY}", "Content-Type": "application/json"},
        json={
            "model": LLM_MODEL_HF,
            "messages":[{"role":"user","content":f"Based on the context: {context}, what is mentioned about AI?"}]
        }
    )
    print(llm_resp.json())
for (const doc of docs) {
  const qRes = await fetch(`${LLM_BASE}/${EMBEDDING_MODEL}/embeddings`, {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${EMBED_KEY}`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify({ model: EMBEDDING_MODEL, input: [doc.text] })
  });
  const qJson = await qRes.json();
  const qEmb  = qJson.data[0].embedding;
  const encoded = encodeURIComponent(JSON.stringify(qEmb));

  const hitRes = await fetch(
    `${DB_BASE}/items5?query_vector=${encoded}&vector_column=embedding&distance_operator=<=>&limit=1`,
    { headers: { "Authorization": `Bearer ${DB_KEY}` } }
  );
  const hitJson = await hitRes.json();
  const context = hitJson[0].data.text;

  const chatRes = await fetch(
    `${LLM_BASE}/${LLM_MODEL}/chat/completions`,
    {
      method: "POST",
      headers: {
        "Authorization": `Bearer ${LLM_KEY}`,
        "Content-Type": "application/json"
      },
      body: JSON.stringify({
        model: LLM_MODEL_HF,
        messages: [{ role: "user", content: `Based on the context: ${context}, what is mentioned about AI?` }]
      })
    }
  );
  console.log(await chatRes.json());