Use Preset RAG Pipeline
Installation
# you can use a Conda environment
pip install --extra-index-url https://oauth2accesstoken:$(gcloud auth print-access-token)@glsdk.gdplabs.id/gen-ai-internal/simple/ "gllm-rag"# you can use a Conda environment
pip install --extra-index-url https://oauth2accesstoken:$(gcloud auth print-access-token)@glsdk.gdplabs.id/gen-ai-internal/simple/ "gllm-rag"FOR /F "tokens=*" %T IN ('gcloud auth print-access-token') DO pip install --extra-index-url "https://oauth2accesstoken:%T@glsdk.gdplabs.id/gen-ai-internal/simple/" gllm-ragInserting Documents into the Data Store
1
import os
import asyncio
from gllm_core.schema import Chunk
from gllm_rag.preset.initializer import build_data_store, build_em_invoker
async def main():
em_invoker = build_em_invoker(
model_id="openai/text-embedding-3-small",
credentials=os.getenv("OPENAI_API_KEY"),
)
store = build_data_store(
store_type="chroma",
index_name="my_index_name",
embedding=em_invoker,
)
documents = [
"Mount Bromo in Bromo Tengger Semeru National Park is famous for its sea of sand and active volcano.",
"Komodo National Park is home to the Komodo dragon and offers world-class diving spots with rich coral reefs.",
"Ujung Kulon National Park protects the endangered Javan rhinoceros and features tropical rainforests and coastal ecosystems."
]
chunks = [Chunk(content=doc) for doc in documents]
ids = await store.add_chunks(chunks)
print(len(ids))
if __name__ == "__main__":
asyncio.run(main())2
python insert_data.py3
3Running the Pipeline
1
import asyncio
import os
from gllm_rag.preset import SimpleRAG
async def main():
rag = SimpleRAG(
language_model_id="openai/gpt-4o-mini",
language_model_credentials=os.getenv("OPENAI_API_KEY"),
embedding_model_id="openai/text-embedding-3-small",
embedding_model_credentials=os.getenv("OPENAI_API_KEY"),
data_store_type="elasticsearch",
data_store_index="my_index_name",
data_store_config={
"url": "https://my-elasticsearch-endpoint",
"api_key": os.getenv("ELASTICSEARCH_API_KEY")
}
)
await rag("Which national park in Indonesia has Komodo dragons?")
if __name__ == "__main__":
asyncio.run(main())2
python simple_rag.py3
The national park in Indonesia that has Komodo dragons is Komodo National Park.Switching Between Models and Data Stores
Last updated
Was this helpful?