o
    JjgG                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ eeZG dd deeZG dd deZG dd dZG dd deeZ G dd deeZ!G dd deeZ"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Enum)AnyOptionalSequenceTupleUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                   @  s   e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )EvaluatorTypezThe types of the evaluators.qacot_qa
context_qapairwise_stringscore_stringlabeled_pairwise_stringlabeled_score_string
trajectorycriterialabeled_criteriastring_distanceexact_matchregex_matchpairwise_string_distanceembedding_distancepairwise_embedding_distancejson_validityjson_equalityjson_edit_distancejson_schema_validationN)__name__
__module____qualname____doc__QACOT_QA
CONTEXT_QAPAIRWISE_STRINGSCORE_STRINGLABELED_PAIRWISE_STRINGLABELED_SCORE_STRINGAGENT_TRAJECTORYCRITERIALABELED_CRITERIASTRING_DISTANCEEXACT_MATCHREGEX_MATCHPAIRWISE_STRING_DISTANCEEMBEDDING_DISTANCEPAIRWISE_EMBEDDING_DISTANCEJSON_VALIDITYJSON_EQUALITYJSON_EDIT_DISTANCEJSON_SCHEMA_VALIDATION r=   r=   S/var/www/html/zoom/venv/lib/python3.10/site-packages/langchain/evaluation/schema.pyr      sT    r   c                   @  s"   e Zd ZdZeed
ddZd	S )LLMEvalChainz,A base class for evaluators that use an LLM.llmr   kwargsr   returnc                 K     dS )z#Create a new evaluator from an LLM.Nr=   )clsr@   rA   r=   r=   r>   from_llmM       zLLMEvalChain.from_llmN)r@   r   rA   r   rB   r?   )r%   r&   r'   r(   classmethodr   rE   r=   r=   r=   r>   r?   J   s
    r?   c                   @  sX   e Zd ZdZedddZedddZedd	d
ZedddZ		ddddZ	dS )_EvalArgsMixinz(Mixin for checking evaluation arguments.rB   boolc                 C  rC   z2Whether this evaluator requires a reference label.Fr=   selfr=   r=   r>   requires_referenceV      z!_EvalArgsMixin.requires_referencec                 C  rC   )0Whether this evaluator requires an input string.Fr=   rK   r=   r=   r>   requires_input[   rN   z_EvalArgsMixin.requires_inputstrc                 C     d| j j dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r%   rK   r=   r=   r>   _skip_input_warning`   s   z"_EvalArgsMixin._skip_input_warningc                 C  rR   )z*Warning to show when reference is ignored.zIgnoring reference in rS   rT   rK   r=   r=   r>   _skip_reference_warninge   s   z&_EvalArgsMixin._skip_reference_warningN	referenceOptional[str]inputNonec                 C  s|   | j r|du rt| jj d|dur| j st| j | jr,|du r,t| jj d|dur:| js<t| j dS dS dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)rP   
ValueErrorrU   r%   r   rV   rM   rW   )rL   rX   rZ   r=   r=   r>   _check_evaluation_argsl   s   
z%_EvalArgsMixin._check_evaluation_argsrB   rI   rB   rQ   )NN)rX   rY   rZ   rY   rB   r[   )
r%   r&   r'   r(   propertyrM   rP   rV   rW   r]   r=   r=   r=   r>   rH   S   s    rH   c                   @  sx   e Zd ZdZedddZedddZed	d	d
dddZd	d	d
dddZ	d	d	d
dddZ
d	d	d
dddZd	S ) StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.rB   rQ   c                 C  s   | j jS )zThe name of the evaluation.rT   rK   r=   r=   r>   evaluation_name   s   zStringEvaluator.evaluation_namerI   c                 C  rC   rJ   r=   rK   r=   r=   r>   rM      rN   z"StringEvaluator.requires_referenceNrX   rZ   
predictionUnion[str, Any]rX   Optional[Union[str, Any]]rZ   rA   r   dictc                K  rC   )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr=   rL   rd   rX   rZ   rA   r=   r=   r>   _evaluate_strings   rF   z!StringEvaluator._evaluate_stringsc                  s$   t d| jf|||d|I dH S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nrd   rX   rZ   )r   ri   rh   r=   r=   r>   _aevaluate_strings   s   z"StringEvaluator._aevaluate_stringsrY   c                K  s&   | j ||d | jd|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rc   rj   Nr=   )r]   ri   rh   r=   r=   r>   evaluate_strings   s   z StringEvaluator.evaluate_stringsc                  s.   | j ||d | jd|||d|I dH S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rc   rj   Nr=   )r]   rk   rh   r=   r=   r>   aevaluate_strings   s   z!StringEvaluator.aevaluate_stringsr_   r^   )
rd   re   rX   rf   rZ   rf   rA   r   rB   rg   )
rd   rQ   rX   rY   rZ   rY   rA   r   rB   rg   )r%   r&   r'   r(   r`   rb   rM   r   ri   rk   rl   rm   r=   r=   r=   r>   ra      s&    #ra   c                   @  s\   e Zd ZdZeddddddZddddddZddddddZddddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nrc   rd   rQ   prediction_brX   rY   rZ   rA   r   rB   rg   c                K  rC   )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr=   rL   rd   ro   rX   rZ   rA   r=   r=   r>   _evaluate_string_pairs   rF   z.PairwiseStringEvaluator._evaluate_string_pairsc                  s&   t d| jf||||d|I dH S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nrd   ro   rX   rZ   )r   rr   rq   r=   r=   r>   _aevaluate_string_pairs  s   z/PairwiseStringEvaluator._aevaluate_string_pairsc                K  s(   | j ||d | jd||||d|S )rp   rc   rt   Nr=   )r]   rr   rq   r=   r=   r>   evaluate_string_pairs/  s   z-PairwiseStringEvaluator.evaluate_string_pairsc                  s0   | j ||d | jd||||d|I dH S )rs   rc   rt   Nr=   )r]   ru   rq   r=   r=   r>   aevaluate_string_pairsL  s   z.PairwiseStringEvaluator.aevaluate_string_pairs)rd   rQ   ro   rQ   rX   rY   rZ   rY   rA   r   rB   rg   )	r%   r&   r'   r(   r   rr   ru   rv   rw   r=   r=   r=   r>   rn      s    #"rn   c                   @  sb   e Zd ZdZedddZedddddZdddddZdddddZ	dddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.rB   rI   c                 C  rC   )rO   Tr=   rK   r=   r=   r>   rP   m  rN   z'AgentTrajectoryEvaluator.requires_inputN)rX   rd   rQ   agent_trajectory!Sequence[Tuple[AgentAction, str]]rZ   rX   rY   rA   r   rg   c                K  rC   )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr=   rL   rd   ry   rZ   rX   rA   r=   r=   r>   _evaluate_agent_trajectoryr  rF   z3AgentTrajectoryEvaluator._evaluate_agent_trajectoryc                  s&   t d| jf||||d|I dH S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)rd   ry   rX   rZ   )r   r}   r|   r=   r=   r>   _aevaluate_agent_trajectory  s   z4AgentTrajectoryEvaluator._aevaluate_agent_trajectoryc                K  s(   | j ||d | jd||||d|S )r{   rc   rd   rZ   ry   rX   Nr=   )r]   r}   r|   r=   r=   r>   evaluate_agent_trajectory  s   z2AgentTrajectoryEvaluator.evaluate_agent_trajectoryc                  s0   | j ||d | jd||||d|I dH S )r~   rc   r   Nr=   )r]   r   r|   r=   r=   r>   aevaluate_agent_trajectory  s   z3AgentTrajectoryEvaluator.aevaluate_agent_trajectoryr^   )rd   rQ   ry   rz   rZ   rQ   rX   rY   rA   r   rB   rg   )r%   r&   r'   r(   r`   rP   r   r}   r   r   r   r=   r=   r=   r>   rx   j  s    %$rx   )#r(   
__future__r   loggingabcr   r   enumr   typingr   r   r   r	   r
   warningsr   langchain_core.agentsr   langchain_core.language_modelsr   langchain_core.runnables.configr   langchain.chains.baser   	getLoggerr%   loggerrQ   r   r?   rH   ra   rn   rx   r=   r=   r=   r>   <module>   s$    
6	1tr