SQL Index with Table Contextο
Demo where table contains context.
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import GPTSQLStructStoreIndex, SQLDatabase, SimpleDirectoryReader, WikipediaReader, Document
from llama_index.indices.struct_store import SQLContextContainerBuilder
from IPython.display import Markdown, display
Load Wikipedia Dataο
# install wikipedia python package
!pip install wikipedia
wiki_docs = WikipediaReader().load_data(pages=['Toronto', 'Berlin', 'Tokyo'])
Create Database Schemaο
from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column
engine = create_engine("sqlite:///:memory:")
metadata_obj = MetaData()
# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
table_name,
metadata_obj,
Column("city_name", String(16), primary_key=True),
Column("population", Integer),
Column("country", String(16), nullable=False),
)
metadata_obj.create_all(engine)
Build Index with Contextο
from llama_index import GPTSQLStructStoreIndex, SQLDatabase
from llama_index.indices.struct_store import SQLContextContainerBuilder
sql_database = SQLDatabase(engine, include_tables=["city_stats"])
sql_database.table_info
We either set the context manually, or have GPT extract the context for us
# manually set context text
city_stats_text = (
"This table gives information regarding the population and country of a given city.\n"
"The user will query with codewords, where 'foo' corresponds to population and 'bar'"
"corresponds to city."
)
table_context_dict={"city_stats": city_stats_text}
context_builder = SQLContextContainerBuilder(sql_database, context_dict=table_context_dict)
context_container = context_builder.build_context_container()
index = GPTSQLStructStoreIndex.from_documents(
wiki_docs,
sql_database=sql_database,
table_name="city_stats",
sql_context_container=context_container
)
# extract context from a raw Document using GPT
city_stats_text = (
"This table gives information regarding the population and country of a given city.\n"
)
context_documents_dict = {"city_stats": [Document(city_stats_text)]}
context_builder = SQLContextContainerBuilder.from_documents(
context_documents_dict,
sql_database
)
context_container = context_builder.build_context_container()
index = GPTSQLStructStoreIndex.from_documents(
wiki_docs,
sql_database=sql_database,
table_name="city_stats",
sql_context_container=context_container,
)
# view current table
stmt = select(
city_stats_table.c["city_name", "population", "country"]
).select_from(city_stats_table)
with engine.connect() as connection:
results = connection.execute(stmt).fetchall()
print(results)
Query Indexο
Here we show a natural language query, which is translated to a SQL query under the hood.
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(
query_mode="nl"
)
response = query_engine.query("Which city has the highest population?")
We can also use codewords during the NL query!
# set Logging to DEBUG for more detailed outputs
response = query_engine.query("Which bar has the highest foo?")
display(Markdown(f"<b>{response}</b>"))