GLChat DPO

GLChat DPO provides various environment variables so you can customize and configure various aspects of the application.

###############################################################################
# GLCHAT DPO CONFIGURATION
# Last Updated: 2025-11-28
###############################################################################

#------------------------------------------------------------------------------
# ENVIRONMENT
#------------------------------------------------------------------------------
# Basic application configuration settings

# Application Environment
# Basic runtime configuration for the application
ENVIRONMENT="development"               # Optional - Application environment setting (default: development)
VERSION_NUMBER="version-number"         # Optional - Application version number for tracking deployments (default: version-number)
BUILD_NUMBER="build-number"             # Optional - Application build number for tracking deployments (default: build-number)

#------------------------------------------------------------------------------
# SHARED INFRASTRUCTURE
#------------------------------------------------------------------------------
# Infrastructure services shared

# Backend Communication (Required)
# Internal service communication settings to glchat backend service
BACKEND_URL="http://127.0.0.1:8000"     # Required - Backend API base URL for internal service communication
BACKEND_TIMEOUT_SECONDS=300             # Optional - Timeout in seconds for backend API requests (default: 300)

# Frontend Integration (Optional)
# Configure this when you need to process document that contains images
FRONTEND_URL="http://127.0.0.1:3000"    # Optional - Base URL of the frontend for media storage
FRONTEND_MEDIA_API_ENDPOINT=""          # Optional - Frontend media API endpoint path

# Named Entity Recognition (Optional)
# Configure this when you need to enable PII anonymization using the NER service
NER_API_URL=""                          # Optional - NER service API endpoint URL
NER_API_KEY=""                          # Optional - NER service API authentication key

# BOSA Connector for Google Drive Downloader (Optional)
# Configure this when you need to access private Google Drive documents via BOSA connector
BOSA_API_BASE_URL="https://api.bosa.id" # Optional - BOSA API base URL
BOSA_API_KEY=""                         # Optional - BOSA API authentication key
BOSA_IDENTIFIER=""                      # Optional - BOSA API identifier
BOSA_SECRET_KEY=""                      # Optional - BOSA API secret key

# LLM Labs Integration (Optional)
# Configure this when using LLM Labs for knowledge base management
LLM_LABS_BASE_URL="https://llm-domain.com"              # Optional - LLM Labs API base URL (code appends /api/llm-vector-stores)
LLM_LABS_OBJECT_STORAGE_CA_CERTS_PATH=""                     # Optional - Path to CA certificates for LLM Labs API SSL verification


# Database Configuration (Required)
# PostgreSQL database connection for application data
GLCHAT_DB_URL="postgresql://postgres:mysecretpassword@127.0.0.1:5432/glchat"    # Required - PostgreSQL database connection URL

# Google Spreadsheet Configuration (Optional)
# Configure this when DPO_CONFIG_PROVIDER=spreadsheet
SERVICE_ACCOUNT_PRIVATE_KEY=""          # Optional - Google Service Account private key for spreadsheet access
SERVICE_ACCOUNT_CLIENT_EMAIL=""         # Optional - Google Service Account client email for spreadsheet access
CONFIG_SHEET_ID=""                      # Optional - Google Spreadsheet ID containing configuration data

# Redis Configuration (Required)
# Basic Redis server connection settings. Also used to update config between application and backend services.
REDIS_HOST="127.0.0.1"                  # Required - Redis server hostname or IP address
REDIS_PORT=6379                         # Required - Redis server port number
REDIS_DB=0                              # Required - Redis database number to use
REDIS_PASSWORD="password"               # Required - Redis server password for authentication

# Redis Configuration TLS Settings (Optional)
# Configure this when your Redis server requires encrypted connection
REDIS_TLS_ENABLED=false                 # Optional - Enable TLS/SSL encryption (default: false, options: true, false)
REDIS_SSL_CERT_REQS="none"              # Optional - SSL certificate verification requirements (default: none, options: none, optional, required)
REDIS_SSL_CA_CERTS_PATH=""              # Optional - Path to CA certificates file for Redis SSL
REDIS_SSL_CERTFILE_PATH=""              # Optional - Path to client certificate file for Redis SSL
REDIS_SSL_KEYFILE_PATH=""               # Optional - Path to client private key file for Redis SSL

# Object Storage (Optional)
# Configure this when you need to process document that contains images
OBJECT_STORAGE_TYPE="minio"                               # Optional - Object storage provider type (default: minio, options: minio)
OBJECT_STORAGE_USER="user"                                # Optional - Object storage service username (default: user)
OBJECT_STORAGE_PASSWORD="password"                        # Optional - Object storage service password (default: password)
OBJECT_STORAGE_URL="127.0.0.1:9000"                       # Optional - Object storage service endpoint URL (default: 127.0.0.1:9000)
OBJECT_STORAGE_BUCKET="object-storage-bucket-name"        # Optional - Object storage bucket name (default: gdplabs-gen-ai-starter)

# Object Storage Configuration TLS Settings (Optional)
# Configure this when your Object Storage server requires encrypted connection
OBJECT_STORAGE_SECURE_ENABLED=false                       # Optional - Enable HTTPS for object storage (default: false, options: true, false)
OBJECT_STORAGE_CERT_CHECK_ENABLED=false                   # Optional - Enable SSL certificate verification (default: false, options: true, false)
OBJECT_STORAGE_CA_CERTS_PATH=""                           # Optional - Path to CA certificates for object storage SSL

# Azure AI Search Configuration (Optional)
# Configure this when using a pipeline that requires Azure AI Search as the vector database
AZURE_AI_SEARCH_URL=""                  # Optional - Azure AI Search service URL
AZURE_AI_SEARCH_API_KEY=""              # Optional - Azure AI Search API key

# Neo4j Configuration (Optional)
# Configure this when using GraphRAG pipeline for knowledge representation
NEO4J_URL=""                            # Optional - Neo4j graph database connection URL
NEO4J_USERNAME=""                       # Optional - Neo4j database username
NEO4J_PASSWORD=""                       # Optional - Neo4j database password

# OpenAI Models (Optional)
# Configure this when using OpenAI GPT models
OPENAI_API_KEY=""                       # Optional - OpenAI API key

# Cohere Models (Optional)
# Configure this when using Cohere models
COHERE_API_KEY=""                       # Optional - Cohere API key

# Anthropic Models (Optional)
# Configure this when using Anthropic Claude models
ANTHROPIC_API_KEY=""                    # Optional - Anthropic API key

# Google Models (Optional)
# Configure this when using Google Gemini models
GEMINI_API_KEY=""                       # Optional - Google Gemini API key
GOOGLE_VERTEX_AI_CREDENTIAL_PATH=""     # Optional - Path to Google Vertex AI credentials JSON file

# AWS Bedrock Models (Optional)
# Configure this when using AWS Bedrock models
BEDROCK_ACCESS_KEY_ID=""                # Optional - AWS Bedrock access key ID
BEDROCK_SECRET_ACCESS_KEY=""            # Optional - AWS Bedrock secret access key

# Azure OpenAI Models (Optional)
# Configure this when using Azure OpenAI models
AZURE_OPENAI_API_ENDPOINT=""                             # Optional - Azure OpenAI service endpoint URL
AZURE_OPENAI_API_KEY=""                                  # Optional - Azure OpenAI service API key
AZURE_OPENAI_API_VERSION="2024-08-01-preview"            # Optional - Azure OpenAI API version (default: 2024-08-01-preview)
AZURE_OPENAI_DEPLOYMENT_NAME_GPT_4O=""                    # Optional - Azure deployment name for GPT-4o
AZURE_OPENAI_DEPLOYMENT_NAME_GPT_4O_MINI=""               # Optional - Azure deployment name for GPT-4o-mini
AZURE_OPENAI_DEPLOYMENT_NAME_TEXT_EMBEDDING_3_SMALL=""   # Optional - Azure deployment name for text-embedding-3-small

# Text Embeddings Inference (TEI) (Optional)
# Configure this when using TEI models
TEI_API_KEY=""                # Optional - TEI service API key

# Voyage AI (Optional)
# Configure this when using Voyage AI models
VOYAGE_API_KEY=""                       # Optional - Voyage AI API key

# vLLM (Optional)
# Configure this when using vLLM models
VLLM_API_KEY=""                         # Optional - vLLM service API key

# ClamAV Virus Scanner (Optional)
# Configure this when you want to scan documents for malware before processing
VIRUS_SCANNER_ENABLED=false             # Optional - Enable virus scanning for uploaded files (default: false, options: true, false)
CLAMAV_SCAN_MODE="instream"             # Optional - ClamAV scanning mode (default: instream, options: instream, scan)
CLAMAV_HOST=""                          # Optional - ClamAV server hostname or IP address
CLAMAV_PORT=3310                        # Optional - ClamAV server port number (default: 3310)
CLAMAV_TIMEOUT_SECONDS=5                # Optional - Timeout in seconds for virus scanning operations (default: 5)

# RabbitMQ Configuration (Required)
# Message broker for asynchronous task processing
RABBITMQ_HOST="localhost"               # Required - RabbitMQ server hostname or IP address
RABBITMQ_USER="user"                    # Required - RabbitMQ username for authentication
RABBITMQ_PASSWORD="password"            # Required - RabbitMQ password for authentication
RABBITMQ_PORT=5672                      # Required - RabbitMQ server port number (default: 5672)

#------------------------------------------------------------------------------
# LOGGING & MONITORING
#------------------------------------------------------------------------------

# Logging & Monitoring Configuration
# Application logging and error monitoring settings
LOG_FORMAT="simple"                         # Optional - Log message format (default: simple, options: simple, text, json)
DPO_LOG_LEVEL="INFO"                        # Optional - Logging level (default: INFO, options: DEBUG, INFO, WARNING, ERROR, CRITICAL)
DPO_LOG_FILE_ENABLED=false                  # Optional - Enable file-based logging (default: false, options: true, false)
DPO_LOG_BASE_DIR="data/logs"                # Optional - Base directory for storing log files (default: data/logs)
DPO_LOG_WHEN="D"                            # Optional - Log rotation interval (default: D, options: S, M, H, D, W0-W6, midnight)
DPO_LOG_BACKUP_COUNT=5                      # Optional - Number of backup log files to keep (default: 5)
DPO_LOG_TIMEZONE="UTC"                      # Optional - Timezone for log timestamps (default: UTC)

# Sentry Error Monitoring (Optional)
# Configure this when you want centralized error reporting and monitoring
SENTRY_DSN=""                               # Optional - Sentry DSN for error reporting
DPO_SENTRY_PROJECT="sentry-project-name"    # Optional - Sentry project name for organizing error reports (default: sentry-project-name)

# -----------------------------------------------------------------------------
# CONFIGURATION PROVIDER
# -----------------------------------------------------------------------------
# Configuration Provider (Required)
# Selection of configuration data source
DPO_CONFIG_PROVIDER="db"                # Required - Configuration source selection (default: db, options: db, spreadsheet) - case insensitive

#------------------------------------------------------------------------------
# SECURITY & ENCRYPTION
#------------------------------------------------------------------------------
# Encryption settings for securing sensitive data

# Encryption Configuration, key and key id must match with BE env
# Configure this when using LLM Labs
ENCRYPTION_KEY="enc-key"                           # Required - 32-byte key used for decrypting DB fields and encrypting sensitive logs
ENCRYPTION_KEY_ID="enc-key-id"                     # Required - Encryption key id (default: k1)
DPO_NUMBER_OF_ITERATIONS=100000                    # Optional - Number of iterations for key derivation function (default: 100000)

#------------------------------------------------------------------------------
# MESSAGE QUEUE & TASK PROCESSING (Required)
#------------------------------------------------------------------------------
# Asynchronous task processing and job queue management

# Celery Configuration (Required)
# Distributed task queue for background job processing
DPO_CELERY_APP_NAME="docproc"                                 # Required - Celery application name for identification (default: docproc)
DPO_CELERY_BROKER_URL="amqp://user:password@localhost:5672/"  # Required - Celery broker URL for message queue connection
DPO_CELERY_TIME_LIMIT_SECONDS=120                             # Required - Maximum time limit for Celery tasks in seconds (default: 120)
DPO_CELERY_TIMEZONE="UTC"                                     # Required - Timezone for Celery scheduler and tasks (default: UTC)

# Celery TLS Settings (Optional)
# Configure this when your Celery message broker requires encrypted connection
DPO_CELERY_BROKER_TLS_ENABLED=false         # Optional - Enable TLS/SSL for broker connection (default: false, options: true, false)
DPO_CELERY_BROKER_TLS_CERT_REQS="none"      # Optional - SSL certificate verification requirements (default: none, options: none, optional, required)
DPO_CELERY_BROKER_TLS_CA_CERT_PATH=""       # Optional - Path to CA certificates for Celery broker TLS
DPO_CELERY_BROKER_TLS_CERT_PATH=""          # Optional - Path to client certificate for Celery broker TLS
DPO_CELERY_BROKER_TLS_KEY_PATH=""           # Optional - Path to client private key for Celery broker TLS
DPO_CELERY_BROKER_TLS_CIPHERS=""            # Optional - Allowed TLS ciphers for Celery broker connection

# Job Queue Management (Optional)
# Configure this when you want change where job queue data is stored
DPO_JOB_QUEUE_DIR="data/queues"             # Optional - Directory for storing job queue data (default: data/queues)

# Job DLX Management (Optional)
# Configure this when you want to support retry failed jobs by using Dead Letter Exchange (DLX)
DPO_JOB_DLX_ENABLED=false                   # Optional - Enable Dead Letter Exchange for failed jobs (default: false, options: true, false)
DPO_JOB_DLX_QUEUE_DIR="data/dlx_queues"     # Optional - Directory for Dead Letter Exchange queue storage (default: data/dlx_queues)
DPO_JOB_DLX_MAX_REQUEUED_MESSAGES=100       # Optional - Maximum number of messages to requeue from DLX (default: 100)

#------------------------------------------------------------------------------
# DOWNLOADER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# File download operation settings

# Download Settings (Optional)
# Configure this when configuring downloader settings
DPO_DOWNLOADER_TIMEOUT_SECONDS=60           # Optional - Timeout in seconds for download operations (default: 60)

#------------------------------------------------------------------------------
# LOADER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Document loading and caching settings

# Loader Cache Settings (Optional)
# Configure this when you want to cache document loading operations
DPO_LOADER_CACHE_ENABLED=true               # Optional - Enable caching for document loading operations (default: true, options: true, false)
DPO_LOADER_CACHE_VERSION="version-0.0.1"    # Optional - Version identifier for loader cache
DPO_LOADER_CACHE_DIR="data/cache/loader"    # Optional - Directory for storing loader cache files (default: data/cache/loader)

# Network Proxy (Optional)
# Configure this when using HTTP proxy for processing Youtube
DPO_PROXY_URL=""                            # Optional - HTTP proxy URL for document loading requests

# Azure Document Intelligence (Optional)
# Configure this when processing PDFs using Azure AI Document Intelligence
DPO_AZURE_AI_DOCUMENT_INTELLIGENCE_ENDPOINT=""                # Optional - Azure AI Document Intelligence service endpoint URL
DPO_AZURE_AI_DOCUMENT_INTELLIGENCE_KEY=""                     # Optional - Azure AI Document Intelligence API key

#------------------------------------------------------------------------------
# DATA GENERATOR CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Content generation and processing settings

# Model API Keys (Optional)
# Configure this when using API keys for image captioning AI models.
DPO_MODEL_API_KEYS="{}"                     # Optional - Additional API keys as JSON string for custom models. Example: "{\"replicate/llama-2-70b\":\"r8_xxx\", \"huggingface/zephyr-7b\":\"hf_xxx\"}" (default: "{}")

#------------------------------------------------------------------------------
# INDEXER CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Document indexing and search processing settings

# Indexing Retry Configuration (Optional)
# Configure this when enabling retry logic for indexing operations
DPO_INDEXER_MAX_RETRIES=3                   # Optional - Maximum number of retry attempts for failed indexing operations (default: 3)
DPO_INDEXER_BASE_DELAY_SECONDS=1.0          # Optional - Base delay in seconds between retry attempts (default: 1.0)
DPO_INDEXER_MAX_DELAY_SECONDS=10.0          # Optional - Maximum delay in seconds between retry attempts (default: 10.0)
DPO_INDEXER_EXPONENTIAL_BASE=2.0            # Optional - Exponential base for calculating retry delays (default: 2.0)
DPO_INDEXER_JITTER_ENABLED=true             # Optional - Enable jitter for retry delays (default: true, options: true, false)
DPO_INDEXER_TIMEOUT_SECONDS=60.0            # Optional - Timeout in seconds for individual indexing operations (default: 60.0)
DPO_INDEXER_BATCH_SIZE=25                   # Optional - Batch size for processing multiple documents at once (default: 25)

# Vector Database Cache (Optional)
# Configure this when you want to cache vector search results for performance
DPO_VECTOR_DB_CACHE_ENABLED=false               # Optional - Enable caching for vector database operations (default: false, options: true, false)
DPO_VECTOR_DB_CACHE_VERSION="version-0.0.1"     # Optional - Version identifier for vector database cache (default: version-0.0.1)
DPO_VECTOR_DB_CACHE_DIR="data/cache/vector_db"  # Optional - Directory for storing vector database cache files (default: data/cache/vector_db)

# HuggingFace Configuration (Optional)
# Configure this when using HuggingFace models for local model inference
DPO_HUGGINGFACE_CACHE_FOLDER="data/huggingface_cache"        # Optional - Directory for storing HuggingFace model cache (default: data/huggingface_cache)

#------------------------------------------------------------------------------
# SQL TABLE INDEXER (Optional)
#------------------------------------------------------------------------------
# Database table indexing for structured data

# SQL Table Processing (Optional)
# Configure this when you need to index structured data from SQL databases
DPO_SQL_TABLE_INDEXER_EMBEDDING_BATCH_SIZE=50  # Optional - Batch size for SQL table embedding operations (default: 50)
DPO_SQL_TABLE_INDEXER_INSERT_BATCH_SIZE=1000   # Optional - Batch size for SQL table insert operations (default: 1000)

# Available Model Providers (Optional)
# Configure this when you want to enable specific language model providers
# Supported providers: openai, cohere, google, bedrock, azure-openai, tei, voyage, vllm, anthropic (case insensitive)
DPO_SUPPORTED_MODEL_PROVIDERS=""               # Optional - Comma-separated list of enabled language model providers

#------------------------------------------------------------------------------
# BVT CONFIGURATION (Optional)
#------------------------------------------------------------------------------
# Build Verification Test settings

# BVT Enabled (Optional)
# Configure this when you want to enable BVT checks
DPO_BVT_ENABLED=true                           # Optional - Enable BVT checks (default: true, options: true, false)
###############################################################################
# END OF CONFIGURATION
###############################################################################

Last updated