Creating a "knowledge base": The information that the system will work with is divided into small chunks. These chunks then go through an encoding process—transforming the text into vector representations, which are stored in a vector database for quick access. In our case, we will manually divide the OpenAPI specification into chunks and store their vector representations in ChromaDB.
Retrieving data during a query: When a user sends a query, the system employs a search mechanism to extract the relevant chunks of information from the vector database. These data are then sent to the generative model to formulate a response. To retrieve the data, we will use Langchain.
def get_openapi_spec_paths(specification: dict) -> dict:
paths = []
for p in specification["paths"]:
for m in specification["paths"][p]:
path = specification["paths"][p][m]
path["method"] = m
path["path"] = p
paths.append(path)
return paths
import json
from langchain.docstore.document import Document
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
specification = get_openapi_spec(url)
paths = get_openapi_spec_paths(specification)
dumped_paths = dump_openapi_spec_to_chroma_docs(paths)
for p in paths:
dumped_paths.append(
Document(
page_content=json.dumps(p),
metadata={"source": "local"})
)
embeddings = OpenAIEmbeddings( model="text-embedding-ada-002" )
Chroma.from_documents(
documents=dumped_paths,
embedding=embeddings,
persist_directory="data",
collection_name="spec"
)
embeddings = OpenAIEmbeddings(model=settings.OPENAI_API_EMBEDDINGS_MODEL)
llm = ChatOpenAI(api_key=settings.OPENAI_API_KEY, model=settings.OPEN_API_MODEL)
chroma_db = Chroma(
persist_directory="data",
embedding_function=embeddings, collection_name="spec", ) retriever = chroma_db.as_retriever()
prompt = PromptTemplate.from_template(
"""
System: You are an assistant that converts OpenAPI JSON specs into neatly structured,
human-readable text with summary, description, tags, produces, responses, parameters, method, and curl examples.
EXAMPLE ANSWER:
### GET /dogs
**Summary**: Retrieve a list of dogs
**Description**: Get a filtered list of dogs based on query parameters such as breed, age, or size.
**Tags**: Dogs
**Produces**: application/json
**Parameters**:
- **Query**:
- `breed` (string, optional): Filter by dog breed.
- `age` (integer, optional): Filter by dog age.
- `size` (string, optional): Filter by dog size (small, medium, large).
**Method**: GET
**Curl Example**:
curl -X GET "//api.example.com/dogs?breed=labrador&size=medium" -H "Authorization: Bearer <your_token>"
Now, format the following OpenAPI spec: {context} {question} """
)
def format_docs(docs):
return \n \\n".join(doc.page_content for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
chain.invoke(("How to get list of dogs?")
ollama pull llama3.1
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.1:8b")
Let’s add Streamlit to quickly create a web interface for our chat.
Streamlit is an open-source Python framework for data scientists and AI/ML engineers to deliver dynamic data apps - in only a few lines of code.
Make a file chat.py
.
import streamlit as st
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Enter your message."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
if (len(st.session_state.messages)):
r = chat_with_model(st.session_state.messages[-1]["content"])
response = st.write(r)
st.session_state.messages.append({"role": "assistant", "content": response})
Run it streamline run chat.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
app = FastAPI()
# In-memory storage for dogs
dogs_db = {}
# Pydantic model for a dog
class Dog(BaseModel):
name: str
breed: str
age: int
class DogUpdate(BaseModel):
name: Optional[str] = None
breed: Optional[str] = None
age: Optional[int] = None
@app.get("/dogs", response_model=List[Dog])
def get_dogs(breed: Optional[str] = None, min_age: Optional[int] = None, max_age: Optional[int] = None):
"""
Get a list of all dogs. Optionally filter by breed, minimum age, and maximum age.
- **breed**: Filter by dog breed.
- **min_age**: Minimum age of the dogs to retrieve.
- **max_age**: Maximum age of the dogs to retrieve.
"""
filtered_dogs = [dog for dog in dogs_db.values()]
if breed:
filtered_dogs = [dog for dog in filtered_dogs if dog.breed == breed]
if min_age is not None:
filtered_dogs = [dog for dog in filtered_dogs if dog.age >= min_age]
if max_age is not None:
filtered_dogs = [dog for dog in filtered_dogs if dog.age <= max_age]
return filtered_dogs
@app.post("/dogs", response_model=Dog)
def add_dog(dog: Dog):
"""
Add a new dog to the database.
- **name**: The name of the dog.
- **breed**: The breed of the dog.
- **age**: The age of the dog.
"""
dog_id = len(dogs_db) + 1
dogs_db[dog_id] = dog
return dog
@app.get("/dogs/{dog_id}", response_model=Dog)
def get_dog(dog_id: int):
"""
Get a specific dog by its ID.
- **dog_id**: The ID of the dog to retrieve.
"""
dog = dogs_db.get(dog_id)
if not dog:
raise HTTPException(status_code=404, detail="Dog not found")
return dog
@app.put("/dogs/{dog_id}", response_model=Dog)
def update_dog(dog_id: int, dog_update: DogUpdate):
"""
Update a dog's information by its ID. Only the fields provided in the request body will be updated.
- **dog_id**: The ID of the dog to update.
- **name**: The new name of the dog (optional).
- **breed**: The new breed of the dog (optional).
- **age**: The new age of the dog (optional).
"""
dog = dogs_db.get(dog_id)
if not dog:
raise HTTPException(status_code=404, detail="Dog not found")
if dog_update.name is not None:
dog.name = dog_update.name
if dog_update.breed is not None:
dog.breed = dog_update.breed
if dog_update.age is not None:
dog.age = dog_update.age
dogs_db[dog_id] = dog
return dog
Here is the link to the repository for local project deployment.