Graph RAG
Installation
# you can use a Conda environment
pip install --extra-index-url https://oauth2accesstoken:$(gcloud auth print-access-token)@glsdk.gdplabs.id/gen-ai-internal/simple/ "gllm-docproc[kg]"# you can use a Conda environment
$token = (gcloud auth print-access-token)
pip install --extra-index-url "https://oauth2accesstoken:$token@glsdk.gdplabs.id/gen-ai-internal/simple/" "gllm-docproc[kg]"# you can use a Conda environment
FOR /F "tokens=*" %T IN ('gcloud auth print-access-token') DO SET TOKEN=%T
pip install --extra-index-url "https://oauth2accesstoken:%TOKEN%@glsdk.gdplabs.id/gen-ai-internal/simple/" "gllm-docproc[kg]"LightRAG Graph RAG Indexer
1
import json
from gllm_inference.em_invoker import OpenAIEMInvoker
from gllm_inference.lm_invoker import OpenAILMInvoker
from gllm_docproc.indexer.graph.light_rag_graph_rag_indexer import LightRAGGraphRAGIndexer
from gllm_datastore.graph_data_store.light_rag_postgres_data_store import LightRAGPostgresDataStore
# Read elements from JSON file
file_path = "./structuredelementchunker-output.json"
with open(file_path, "r", encoding="utf-8") as f:
elements = json.load(f)
# Initialize LM and Embedding invokers
lm_invoker = OpenAILMInvoker(model_name="gpt-4o-mini")
em_invoker = OpenAIEMInvoker(model_name="text-embedding-3-small")
# Create the LightRAG PostgreSQL data store
graph_store = LightRAGPostgresDataStore(
lm_invoker=lm_invoker,
em_invoker=em_invoker,
postgres_db_host="localhost",
postgres_db_port=5455,
postgres_db_user="rag",
postgres_db_password="rag",
postgres_db_name="rag",
postgres_db_workspace="default",
)
indexer = LightRAGGraphRAGIndexer(graph_store=graph_store)
indexer.index(elements, file_id="file_001")
2
export OPENAI_API_KEY=<OPENAI_API_KEY>
python main.pyLlamaIndex Graph RAG Indexer
1
import json
from gllm_docproc.indexer.graph.llama_index_graph_rag_indexer import LlamaIndexGraphRAGIndexer
from gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store import (
LlamaIndexNeo4jGraphRAGDataStore,
)
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
# Read elements from JSON file
file_path = "./structuredelementchunker-output.json"
with open(file_path, "r", encoding="utf-8") as f:
elements = json.load(f)
# Initialize LlamaIndex LLM and Embedding models
llm = OpenAI(model="gpt-4o-mini", temperature=0)
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
# Create Neo4j graph store
graph_store = LlamaIndexNeo4jGraphRAGDataStore(
username="<NEO4J_USERNAME>",
password="<NEO4J_PASSWORD>",
url="<NEO4J_URL>",
)
# Create the indexer with default extractors
indexer = LlamaIndexGraphRAGIndexer(
graph_store=graph_store,
llama_index_llm=llm,
embed_model=embed_model,
)
# Index the elements with document metadata
indexer.index(elements, file_id="file_001")
2
export OPENAI_API_KEY=<OPENAI_API_KEY>
python main.pyLast updated
Was this helpful?