🛠️Create Custom Evaluator / Scorer
1. Implement Custom Metric and Custom Evaluator
Example Usage
import asyncio
from gllm_evals.dataset import load_simple_rag_dataset
from gllm_evals.evaluator.evaluator import BaseEvaluator
from gllm_evals.metrics.metric import BaseMetric
from gllm_evals.types import MetricInput, MetricOutput, EvaluationOutput
class ExactMatchMetric(BaseMetric):
def __init__(self):
self.name = "exact_match"
async def _evaluate(self, data: MetricInput) -> MetricOutput:
score = int(data["generated_response"] == data["expected_response"])
return {"score": score, "explanation": None}
class ResponseEvaluator(BaseEvaluator):
def __init__(self):
super().__init__(name="response_evaluator")
self.metric = ExactMatchMetric()
async def _evaluate(self, data: MetricInput) -> EvaluationOutput:
return await self.metric.evaluate(data)
async def main():
evaluator = ResponseEvaluator()
data = load_simple_rag_dataset()
result = await evaluator.evaluate(data[0])
print(result)
if __name__ == "__main__":
asyncio.run(main())
Example Output
2. Combining Existing Metrics with CustomEvaluator
Example Usage
Example Output
Which Method Should You Use?
Use Case
Recommended Method
Last updated
Was this helpful?