GLChat DPO
GLChat DPO provides various environment variables so you can customize and configure various aspects of the application.
###############################################################################
# GLCHAT DPO CONFIGURATION
# Last Updated: 2025-11-28
###############################################################################
#------------------------------------------------------------------------------
# ENVIRONMENT
#------------------------------------------------------------------------------
# Basic application configuration settings
# Application Environment
# Basic runtime configuration for the application
ENVIRONMENT="development" # Optional - Application environment setting (default: development)
VERSION_NUMBER="version-number" # Optional - Application version number for tracking deployments (default: version-number)
BUILD_NUMBER="build-number" # Optional - Application build number for tracking deployments (default: build-number)
#------------------------------------------------------------------------------
# SHARED INFRASTRUCTURE
#------------------------------------------------------------------------------
# Infrastructure services shared
# Backend Communication (Required)
# Internal service communication settings to glchat backend service
BACKEND_URL="http://127.0.0.1:8000" # Required - Backend API base URL for internal service communication
BACKEND_TIMEOUT_SECONDS=300 # Optional - Timeout in seconds for backend API requests (default: 300)
# Frontend Integration (Optional)
# Configure this when you need to process document that contains images
FRONTEND_URL="http://127.0.0.1:3000" # Optional - Base URL of the frontend for media storage
FRONTEND_MEDIA_API_ENDPOINT="" # Optional - Frontend media API endpoint path
# Named Entity Recognition (Optional)
# Configure this when you need to enable PII anonymization using the NER service
NER_API_URL="" # Optional - NER service API endpoint URL
NER_API_KEY="" # Optional - NER service API authentication key
# BOSA Connector for Google Drive Downloader (Optional)
# Configure this when you need to access private Google Drive documents via BOSA connector
BOSA_API_BASE_URL="https://api.bosa.id" # Optional - BOSA API base URL
BOSA_API_KEY="" # Optional - BOSA API authentication key
BOSA_IDENTIFIER="" # Optional - BOSA API identifier
BOSA_SECRET_KEY="" # Optional - BOSA API secret key
# LLM Labs Integration (Optional)
# Configure this when using LLM Labs for knowledge base management
LLM_LABS_BASE_URL="https://llm-domain.com" # Optional - LLM Labs API base URL (code appends /api/llm-vector-stores)
LLM_LABS_OBJECT_STORAGE_CA_CERTS_PATH="" # Optional - Path to CA certificates for LLM Labs API SSL verification
# Database Configuration (Required)
# PostgreSQL database connection for application data
GLCHAT_DB_URL="postgresql://postgres:mysecretpassword@127.0.0.1:5432/glchat" # Required - PostgreSQL database connection URL
# Google Spreadsheet Configuration (Optional)
# Configure this when DPO_CONFIG_PROVIDER=spreadsheet
SERVICE_ACCOUNT_PRIVATE_KEY="" # Optional - Google Service Account private key for spreadsheet access
SERVICE_ACCOUNT_CLIENT_EMAIL="" # Optional - Google Service Account client email for spreadsheet access
CONFIG_SHEET_ID="" # Optional - Google Spreadsheet ID containing configuration data
# Redis Configuration (Required)
# Basic Redis server connection settings. Also used to update config between application and backend services.
REDIS_HOST="127.0.0.1" # Required - Redis server hostname or IP address
REDIS_PORT=6379 # Required - Redis server port number
REDIS_DB=0 # Required - Redis database number to use
REDIS_PASSWORD="password" # Required - Redis server password for authentication
# Redis Configuration TLS Settings (Optional)
# Configure this when your Redis server requires encrypted connection
REDIS_TLS_ENABLED=false # Optional - Enable TLS/SSL encryption (default: false, options: true, false)
REDIS_SSL_CERT_REQS="none" # Optional - SSL certificate verification requirements (default: none, options: none, optional, required)
REDIS_SSL_CA_CERTS_PATH="" # Optional - Path to CA certificates file for Redis SSL
REDIS_SSL_CERTFILE_PATH="" # Optional - Path to client certificate file for Redis SSL
REDIS_SSL_KEYFILE_PATH="" # Optional - Path to client private key file for Redis SSL
# Object Storage (Optional)
# Configure this when you need to process document that contains images
OBJECT_STORAGE_TYPE="minio" # Optional - Object storage provider type (default: minio, options: minio)
OBJECT_STORAGE_USER="user" # Optional - Object storage service username (default: user)
OBJECT_STORAGE_PASSWORD="password" # Optional - Object storage service password (default: password)
OBJECT_STORAGE_URL="127.0.0.1:9000" # Optional - Object storage service endpoint URL (default: 127.0.0.1:9000)
OBJECT_STORAGE_BUCKET="object-storage-bucket-name" # Optional - Object storage bucket name (default: gdplabs-gen-ai-starter)
# Object Storage Configuration TLS Settings (Optional)
# Configure this when your Object Storage server requires encrypted connection
OBJECT_STORAGE_SECURE_ENABLED=false # Optional - Enable HTTPS for object storage (default: false, options: true, false)
OBJECT_STORAGE_CERT_CHECK_ENABLED=false # Optional - Enable SSL certificate verification (default: false, options: true, false)
OBJECT_STORAGE_CA_CERTS_PATH="" # Optional - Path to CA certificates for object storage SSL
# Azure AI Search Configuration (Optional)
# Configure this when using a pipeline that requires Azure AI Search as the vector database
AZURE_AI_SEARCH_URL="" # Optional - Azure AI Search service URL
AZURE_AI_SEARCH_API_KEY="" # Optional - Azure AI Search API key
# Neo4j Configuration (Optional)
# Configure this when using GraphRAG pipeline for knowledge representation
NEO4J_URL="" # Optional - Neo4j graph database connection URL
NEO4J_USERNAME="" # Optional - Neo4j database username
NEO4J_PASSWORD="" # Optional - Neo4j database password
# OpenAI Models (Optional)
# Configure this when using OpenAI GPT models
OPENAI_API_KEY="" # Optional - OpenAI API key
# Cohere Models (Optional)
# Configure this when using Cohere models
COHERE_API_KEY="" # Optional - Cohere API key
# Anthropic Models (Optional)
# Configure this when using Anthropic Claude models
ANTHROPIC_API_KEY="" # Optional - Anthropic API key
# Google Models (Optional)
# Configure this when using Google Gemini models
GEMINI_API_KEY="" # Optional - Google Gemini API key
GOOGLE_VERTEX_AI_CREDENTIAL_PATH="" # Optional - Path to Google Vertex AI credentials JSON file
# AWS Bedrock Models (Optional)
# Configure this when using AWS Bedrock models
BEDROCK_ACCESS_KEY_ID="" # Optional - AWS Bedrock access key ID
BEDROCK_SECRET_ACCESS_KEY="" # Optional - AWS Bedrock secret access key
# Azure OpenAI Models (Optional)
# Configure this when using Azure OpenAI models
AZURE_OPENAI_API_ENDPOINT="" # Optional - Azure OpenAI service endpoint URL
AZURE_OPENAI_API_KEY="" # Optional - Azure OpenAI service API key
AZURE_OPENAI_API_VERSION="2024-08-01-preview" # Optional - Azure OpenAI API version (default: 2024-08-01-preview)
AZURE_OPENAI_DEPLOYMENT_NAME_GPT_4O="" # Optional - Azure deployment name for GPT-4o
AZURE_OPENAI_DEPLOYMENT_NAME_GPT_4O_MINI="" # Optional - Azure deployment name for GPT-4o-mini
AZURE_OPENAI_DEPLOYMENT_NAME_TEXT_EMBEDDING_3_SMALL="" # Optional - Azure deployment name for text-embedding-3-small
# Text Embeddings Inference (TEI) (Optional)
# Configure this when using TEI models
TEI_API_KEY="" # Optional - TEI service API key
# Voyage AI (Optional)
# Configure this when using Voyage AI models
VOYAGE_API_KEY="" # Optional - Voyage AI API key
# vLLM (Optional)
# Configure this when using vLLM models
VLLM_API_KEY="" # Optional - vLLM service API key
# ClamAV Virus Scanner (Optional)
# Configure this when you want to scan documents for malware before processing
VIRUS_SCANNER_ENABLED=false # Optional - Enable virus scanning for uploaded files (default: false, options: true, false)
CLAMAV_SCAN_MODE="instream" # Optional - ClamAV scanning mode (default: instream, options: instream, scan)
CLAMAV_HOST="" # Optional - ClamAV server hostname or IP address
CLAMAV_PORT=3310 # Optional - ClamAV server port number (default: 3310)
CLAMAV_TIMEOUT_SECONDS=5 # Optional - Timeout in seconds for virus scanning operations (default: 5)
# RabbitMQ Configuration (Required)
# Message broker for asynchronous task processing
RABBITMQ_HOST="localhost" # Required - RabbitMQ server hostname or IP address
RABBITMQ_USER="user" # Required - RabbitMQ username for authentication
RABBITMQ_PASSWORD="password" # Required - RabbitMQ password for authentication
RABBITMQ_PORT=5672 # Required - RabbitMQ server port number (default: 5672)
#------------------------------------------------------------------------------
# LOGGING & MONITORING
#------------------------------------------------------------------------------
# Logging & Monitoring Configuration
# Application logging and error monitoring settings
LOG_FORMAT="simple" # Optional - Log message format (default: simple, options: simple, text, json)
DPO_LOG_LEVEL="INFO" # Optional - Logging level (default: INFO, options: DEBUG, INFO, WARNING, ERROR, CRITICAL)
DPO_LOG_FILE_ENABLED=false # Optional - Enable file-based logging (default: false, options: true, false)
DPO_LOG_BASE_DIR="data/logs" # Optional - Base directory for storing log files (default: data/logs)
DPO_LOG_WHEN="D" # Optional - Log rotation interval (default: D, options: S, M, H, D, W0-W6, midnight)
DPO_LOG_BACKUP_COUNT=5 # Optional - Number of backup log files to keep (default: 5)
DPO_LOG_TIMEZONE="UTC" # Optional - Timezone for log timestamps (default: UTC)
# Sentry Error Monitoring (Optional)
# Configure this when you want centralized error reporting and monitoring
SENTRY_DSN="" # Optional - Sentry DSN for error reporting
DPO_SENTRY_PROJECT="sentry-project-name" # Optional - Sentry project name for organizing error reports (default: sentry-project-name)
# -----------------------------------------------------------------------------
# CONFIGURATION PROVIDER
# -----------------------------------------------------------------------------
# Configuration Provider (Required)
# Selection of configuration data source
DPO_CONFIG_PROVIDER="db" # Required - Configuration source selection (default: db, options: db, spreadsheet) - case insensitive
#------------------------------------------------------------------------------
# SECURITY & ENCRYPTION
#------------------------------------------------------------------------------
# Encryption settings for securing sensitive data
# Encryption Configuration, key and key id must match with BE env
# Configure this when using LLM Labs
ENCRYPTION_KEY="enc-key" # Required - 32-byte key used for decrypting DB fields and encrypting sensitive logs
ENCRYPTION_KEY_ID="enc-key-id" # Required - Encryption key id (default: k1)
DPO_NUMBER_OF_ITERATIONS=100000 # Optional - Number of iterations for key derivation function (default: 100000)
#------------------------------------------------------------------------------
# MESSAGE QUEUE & TASK PROCESSING (Required)
#------------------------------------------------------------------------------
# Asynchronous task processing and job queue management
# Celery Configuration (Required)
# Distributed task queue for background job processing
DPO_CELERY_APP_NAME="docproc" # Required - Celery application name for identification (default: docproc)
DPO_CELERY_BROKER_URL="amqp://user:password@localhost:5672/" # Required - Celery broker URL for message queue connection
DPO_CELERY_TIME_LIMIT_SECONDS=120 # Required - Maximum time limit for Celery tasks in seconds (default: 120)
DPO_CELERY_TIMEZONE="UTC" # Required - Timezone for Celery scheduler and tasks (default: UTC)
# Celery TLS Settings (Optional)
# Configure this when your Celery message broker requires encrypted connection
DPO_CELERY_BROKER_TLS_ENABLED=false # Optional - Enable TLS/SSL for broker connection (default: false, options: true, false)
DPO_CELERY_BROKER_TLS_CERT_REQS="none" # Optional - SSL certificate verification requirements (default: none, options: none, optional, required)
DPO_CELERY_BROKER_TLS_CA_CERT_PATH="" # Optional - Path to CA certificates for Celery broker TLS
DPO_CELERY_BROKER_TLS_CERT_PATH="" # Optional - Path to client certificate for Celery broker TLS
DPO_CELERY_BROKER_TLS_KEY_PATH="" # Optional - Path to client private key for Celery broker TLS
DPO_CELERY_BROKER_TLS_CIPHERS="" # Optional - Allowed TLS ciphers for Celery broker connection
# Job Queue Management (Optional)
# Configure this when you want change where job queue data is stored
DPO_JOB_QUEUE_DIR="data/queues" # Optional - Directory for storing job queue data (default: data/queues)
# Job DLX Management (Optional)
# Configure this when you want to support retry failed jobs by using Dead Letter Exchange (DLX)
DPO_JOB_DLX_ENABLED=false # Optional - Enable Dead Letter Exchange for failed jobs (default: false, options: true, false)
DPO_JOB_DLX_QUEUE_DIR="data/dlx_queues" # Optional - Directory for Dead Letter Exchange queue storage (default: data/dlx_queues)
DPO_JOB_DLX_MAX_REQUEUED_MESSAGES=100 # Optional - Maximum number of messages to requeue from DLX (default: 100)
#------------------------------------------------------------------------------
# DOWNLOADER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# File download operation settings
# Download Settings (Optional)
# Configure this when configuring downloader settings
DPO_DOWNLOADER_TIMEOUT_SECONDS=60 # Optional - Timeout in seconds for download operations (default: 60)
#------------------------------------------------------------------------------
# LOADER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Document loading and caching settings
# Loader Cache Settings (Optional)
# Configure this when you want to cache document loading operations
DPO_LOADER_CACHE_ENABLED=true # Optional - Enable caching for document loading operations (default: true, options: true, false)
DPO_LOADER_CACHE_VERSION="version-0.0.1" # Optional - Version identifier for loader cache
DPO_LOADER_CACHE_DIR="data/cache/loader" # Optional - Directory for storing loader cache files (default: data/cache/loader)
# Network Proxy (Optional)
# Configure this when using HTTP proxy for processing Youtube
DPO_PROXY_URL="" # Optional - HTTP proxy URL for document loading requests
# Azure Document Intelligence (Optional)
# Configure this when processing PDFs using Azure AI Document Intelligence
DPO_AZURE_AI_DOCUMENT_INTELLIGENCE_ENDPOINT="" # Optional - Azure AI Document Intelligence service endpoint URL
DPO_AZURE_AI_DOCUMENT_INTELLIGENCE_KEY="" # Optional - Azure AI Document Intelligence API key
#------------------------------------------------------------------------------
# DATA GENERATOR CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Content generation and processing settings
# Model API Keys (Optional)
# Configure this when using API keys for image captioning AI models.
DPO_MODEL_API_KEYS="{}" # Optional - Additional API keys as JSON string for custom models. Example: "{\"replicate/llama-2-70b\":\"r8_xxx\", \"huggingface/zephyr-7b\":\"hf_xxx\"}" (default: "{}")
#------------------------------------------------------------------------------
# INDEXER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Document indexing and search processing settings
# Indexing Retry Configuration (Optional)
# Configure this when enabling retry logic for indexing operations
DPO_INDEXER_MAX_RETRIES=3 # Optional - Maximum number of retry attempts for failed indexing operations (default: 3)
DPO_INDEXER_BASE_DELAY_SECONDS=1.0 # Optional - Base delay in seconds between retry attempts (default: 1.0)
DPO_INDEXER_MAX_DELAY_SECONDS=10.0 # Optional - Maximum delay in seconds between retry attempts (default: 10.0)
DPO_INDEXER_EXPONENTIAL_BASE=2.0 # Optional - Exponential base for calculating retry delays (default: 2.0)
DPO_INDEXER_JITTER_ENABLED=true # Optional - Enable jitter for retry delays (default: true, options: true, false)
DPO_INDEXER_TIMEOUT_SECONDS=60.0 # Optional - Timeout in seconds for individual indexing operations (default: 60.0)
DPO_INDEXER_BATCH_SIZE=25 # Optional - Batch size for processing multiple documents at once (default: 25)
# Vector Database Cache (Optional)
# Configure this when you want to cache vector search results for performance
DPO_VECTOR_DB_CACHE_ENABLED=false # Optional - Enable caching for vector database operations (default: false, options: true, false)
DPO_VECTOR_DB_CACHE_VERSION="version-0.0.1" # Optional - Version identifier for vector database cache (default: version-0.0.1)
DPO_VECTOR_DB_CACHE_DIR="data/cache/vector_db" # Optional - Directory for storing vector database cache files (default: data/cache/vector_db)
# HuggingFace Configuration (Optional)
# Configure this when using HuggingFace models for local model inference
DPO_HUGGINGFACE_CACHE_FOLDER="data/huggingface_cache" # Optional - Directory for storing HuggingFace model cache (default: data/huggingface_cache)
#------------------------------------------------------------------------------
# SQL TABLE INDEXER (Optional)
#------------------------------------------------------------------------------
# Database table indexing for structured data
# SQL Table Processing (Optional)
# Configure this when you need to index structured data from SQL databases
DPO_SQL_TABLE_INDEXER_EMBEDDING_BATCH_SIZE=50 # Optional - Batch size for SQL table embedding operations (default: 50)
DPO_SQL_TABLE_INDEXER_INSERT_BATCH_SIZE=1000 # Optional - Batch size for SQL table insert operations (default: 1000)
# Available Model Providers (Optional)
# Configure this when you want to enable specific language model providers
# Supported providers: openai, cohere, google, bedrock, azure-openai, tei, voyage, vllm, anthropic (case insensitive)
DPO_SUPPORTED_MODEL_PROVIDERS="" # Optional - Comma-separated list of enabled language model providers
#------------------------------------------------------------------------------
# BVT CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Build Verification Test settings
# BVT Enabled (Optional)
# Configure this when you want to enable BVT checks
DPO_BVT_ENABLED=true # Optional - Enable BVT checks (default: true, options: true, false)
###############################################################################
# END OF CONFIGURATION
###############################################################################
Last updated