RAG (Retrieval Augmented Generation)

Open In Colab Download .ipynb

Install dependencies

!uv pip install sqlite-vec sentence-transformers -q

!mkdir -p .data # Create the data directory, if it doesn't exist
DB_PATH = ".data/course_catalog_rag.db"

Check sqlite-vec extension is loaded

import sqlite3
import sqlite_vec

db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)

vec_version, = db.execute("select vec_version()").fetchone()
print(f"vec_version={vec_version}")
vec_version=v0.1.6

Load the embedding model

from sentence_transformers import SentenceTransformer

EMBEDDING_MODEL = "all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBEDDING_MODEL)

Create the SQLite db, if it doesn’t already exist

import os
import numpy as np

def create_connection(db_path=DB_PATH):
    conn = sqlite3.connect(db_path)
    conn.row_factory = sqlite3.Row
    conn.enable_load_extension(True)
    sqlite_vec.load(conn)
    conn.enable_load_extension(False)
    return conn

def create_and_populate_db(db_path=DB_PATH):
    db_exists = os.path.exists(db_path)

    conn = create_connection(db_path)
    cursor = conn.cursor()

    if db_exists:
        print(f"Found existing database at '{db_path}'. Skipping creation.")
        return conn

    print(f"Creating new database at '{db_path}'...")

    # --- Schema ---
    cursor.executescript("""
        CREATE TABLE instructors (
            id          INTEGER PRIMARY KEY,
            name        TEXT NOT NULL,
            email       TEXT,
            department  TEXT,
            bio         TEXT
        );

        CREATE TABLE courses (
            id              INTEGER PRIMARY KEY,
            code            TEXT NOT NULL UNIQUE,
            title           TEXT NOT NULL,
            description     TEXT,
            credits         INTEGER,
            level           TEXT CHECK(level IN ('intro', 'intermediate', 'advanced')),
            instructor_id   INTEGER REFERENCES instructors(id),
            max_enrollment  INTEGER
        );

        CREATE TABLE schedules (
            id          INTEGER PRIMARY KEY,
            course_id   INTEGER REFERENCES courses(id),
            days        TEXT,
            time_start  TEXT,
            time_end    TEXT,
            room        TEXT,
            semester    TEXT
        );

        CREATE VIRTUAL TABLE course_embeddings USING vec0(
            course_id INTEGER PRIMARY KEY,
            embedding FLOAT[384]
        );
    """)

    # --- Instructors ---
    cursor.executemany(
        "INSERT INTO instructors (name, email, department, bio) VALUES (?, ?, ?, ?)",
        [
            ("Dr. Sarah Chen", "s.chen@university.edu", "Computer Science",
             "Specializes in machine learning and NLP. Author of 'Practical Deep Learning'."),
            ("Prof. Marcus Webb", "m.webb@university.edu", "Computer Science",
             "Focuses on systems programming and computer architecture."),
            ("Dr. Priya Nair", "p.nair@university.edu", "Data Science",
             "Expert in statistical learning, data visualization, and reproducible research."),
            ("Prof. James Okafor", "j.okafor@university.edu", "Computer Science",
             "Teaches software engineering and has 10 years of industry experience at major tech firms."),
            ("Dr. Elena Russo", "e.russo@university.edu", "Mathematics",
             "Research interests include linear algebra, optimization, and mathematical foundations of AI."),
        ]
    )

    # --- Courses ---
    cursor.executemany(
        """INSERT INTO courses (code, title, description, credits, level, instructor_id, max_enrollment)
           VALUES (?, ?, ?, ?, ?, ?, ?)""",
        [
            ("CS-101", "Introduction to Programming",
             "Fundamentals of programming using Python. Covers variables, control flow, functions, and basic data structures.",
             3, "intro", 2, 40),
            ("CS-201", "Data Structures and Algorithms",
             "Core data structures including linked lists, trees, graphs, and hash tables. Algorithm design and complexity analysis.",
             3, "intermediate", 2, 35),
            ("CS-301", "Machine Learning Fundamentals",
             "Supervised and unsupervised learning, model evaluation, feature engineering, and scikit-learn. Final project required.",
             4, "intermediate", 1, 30),
            ("CS-394", "How Generative AI Works",
             "Transformer architectures, large language models, prompt engineering, fine-tuning, and deployment. Hands-on with open-source models.",
             3, "advanced", 1, 25),
            ("CS-310", "Database Systems",
             "Relational database design, SQL, transactions, indexing, and an introduction to NoSQL systems.",
             3, "intermediate", 4, 20),
        ]
    )

    # --- Schedules ---
    cursor.executemany(
        """INSERT INTO schedules (course_id, days, time_start, time_end, room, semester)
           VALUES (?, ?, ?, ?, ?, ?)""",
        [
            (1, "Mon/Wed/Fri", "09:00", "09:50", "Room 101", "Spring 2026"),
            (2, "Tue/Thu",     "10:00", "11:20", "Room 204", "Spring 2026"),
            (3, "Mon/Wed",     "13:00", "14:20", "Lab 12",   "Spring 2026"),
            (4, "Tue/Thu",     "14:00", "15:20", "Lab 12",   "Spring 2026"),
            (5, "Mon/Wed/Fri", "11:00", "11:50", "Room 305", "Spring 2026"),
        ]
    )

    conn.commit()

    # --- Embeddings ---
    # Embed each course as "title: description" so the vector captures both
    print("Generating embeddings...")
    courses = conn.execute("SELECT id, title, description FROM courses").fetchall()
    for course in courses:
        text = f"{course['title']}: {course['description']}"
        embedding = embedder.encode(text).astype(np.float32)
        cursor.execute(
            "INSERT INTO course_embeddings (course_id, embedding) VALUES (?, ?)",
            (course['id'], embedding.tobytes())
        )

    conn.commit()
    print("Database created and populated successfully.")
    print(f"  - {cursor.execute('SELECT COUNT(*) FROM instructors').fetchone()[0]} instructors")
    print(f"  - {cursor.execute('SELECT COUNT(*) FROM courses').fetchone()[0]} courses")
    print(f"  - {cursor.execute('SELECT COUNT(*) FROM schedules').fetchone()[0]} schedules")
    print(f"  - {cursor.execute('SELECT COUNT(*) FROM course_embeddings').fetchone()[0]} embeddings")

    return conn

conn = create_and_populate_db()
Creating new database at '.data/course_catalog_rag.db'...
Generating embeddings...
Database created and populated successfully.
  - 5 instructors
  - 5 courses
  - 5 schedules
  - 5 embeddings

Retrieval functions

def search_courses(conn, user_query, top_k=3):
    """
    Embed the user query and find the most semantically similar courses.
    Returns the top_k closest courses with full details.
    """
    query_embedding = embedder.encode(user_query).astype(np.float32)

    return conn.execute("""
        SELECT
            c.code,
            c.title,
            c.description,
            c.credits,
            c.level,
            c.max_enrollment,
            i.name AS instructor,
            i.email AS instructor_email,
            s.days,
            s.time_start,
            s.time_end,
            s.room,
            s.semester,
            e.distance
        FROM course_embeddings e
        JOIN courses c    ON e.course_id = c.id
        JOIN instructors i ON c.instructor_id = i.id
        JOIN schedules s   ON s.course_id = c.id
        WHERE e.embedding MATCH ?
          AND k = ?
        ORDER BY e.distance
    """, (query_embedding.tobytes(), top_k)).fetchall()


def rows_to_text(rows):
    if not rows:
        return "No results found."
    if isinstance(rows, sqlite3.Row):
        rows = [rows]
    return "\n".join(
        "  " + ", ".join(f"{k}: {row[k]}" for k in row.keys())
        for row in rows
    )


def build_context(conn, user_query):
    rows = search_courses(conn, user_query)
    return f"Most relevant courses for the query:\n{rows_to_text(rows)}"

Get the OpenRouter API key

import sys
import os
from dotenv import load_dotenv

if 'google.colab' in sys.modules:
  from google.colab import userdata # type:ignore
  OPENROUTER_API_KEY = userdata.get('OPENROUTER_API_KEY')
  os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
  print("Loaded key from Colab")
else:
  load_dotenv()
  print("Loaded key locally")
Loaded key locally

Setup OpenRouter Client

MODEL = "nvidia/nemotron-nano-9b-v2:free"

import openai

client = openai.OpenAI(
    base_url='https://openrouter.ai/api/v1',
    api_key=os.environ["OPENROUTER_API_KEY"],
)

User query

USER_PROMPT = "Which courses teach about generative AI?"

Call without injecting course information

response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You help students lookup course information."},
        {"role": "user", "content": USER_PROMPT},
    ],
)
response.choices[0].message.content
'To find courses on generative AI, I’d need a bit more detail:  \n- Are you looking for courses at a specific university or online platform (e.g., Coursera, edX, Udacity)?  \n- Do you prefer beginner-friendly or advanced-level courses?  \n\nLet me know, and I’ll help guide you to the right resources! 😊\n'

Call with injecting course information

# Connect to the sqlite database
conn = create_and_populate_db()

# Query the database based on the user prompt
context = build_context(conn, USER_PROMPT)

# Create system prompt
SYSTEM_PROMPT = f"""
You help students lookup course information. Here are the course details:

---COURSE INFORMATION---
{context}
---END COURSE INFORMATION---
"""

# Query the model
response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": USER_PROMPT},
    ],
)
response.choices[0].message.content
Found existing database at '.data/course_catalog_rag.db'. Skipping creation.
'The course that specifically teaches about generative AI is:\n\n**CS-394: How Generative AI Works**  \n- **Description**: Covers transformer architectures, large language models, prompt engineering, fine-tuning, and deployment. Includes hands-on work with open-source models.  \n- **Details**:  \n  - Credits: 3 | Level: Advanced  \n  - Instructor: Dr. Sarah Chen | [Contact](mailto:s.chen@university.edu)  \n  - Time: Tue/Thu 14:00–15:20 | Room: Lab 12  \n  - Semester: Spring 2026  \n\nThis course directly addresses generative AI concepts and techniques.\n'