o
    JjgB                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ defddZG dd deeZG dd deZG dd deeZ G dd deeZ!dS )z@A chain for comparing the output of two models using embeddings.    )Enum)AnyDictListOptionalN)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  C   sR   z
ddl m}  W |  S  ty(   zddlm}  W Y |  S  ty'   tdw w )zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair   ImportError%langchain_community.embeddings.openair    r   d/var/www/html/zoom/venv/lib/python3.10/site-packages/langchain/evaluation/embedding_distance/base.py_embedding_factory   s   	r   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    cosine	euclidean	manhattan	chebyshevhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr   r   r   r   r   *   s    
r   c                   @   sV  e Zd ZU dZeedZeed< ee	j
dZe	ed< edeeef deeef fdd	Zed
dZedee fddZdedefddZde	defddZedejdejdejfddZedejdejdejfddZedejdejdejfddZedejdejdejfddZ edejdejdejfddZ!d ejde"fd!d"Z#d#S )$_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 C   s   | d}g }zddlm} || W n	 ty   Y nw zddlm} || W n	 ty4   Y nw |s;tdt|t|rUzddl}W |S  tyT   tdw |S )zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r.   r   r   r   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr   r   appendr   r   
isinstancetupletiktoken)clsr1   r.   types_r   r6   r   r   r   _validate_tiktoken_installedH   s8   


z9_EmbeddingDistanceChainMixin._validate_tiktoken_installedT)arbitrary_types_allowedc                 C   s   dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer   selfr   r   r   output_keysx   s   z(_EmbeddingDistanceChainMixin.output_keysresultc                 C   s$   d|d i}t |v r|t  |t < |S )Nr;   r   )r=   r?   parsedr   r   r   _prepare_output   s   z,_EmbeddingDistanceChainMixin._prepare_outputmetricc              
   C   sJ   t j| jt j| jt j| jt j| jt j	| j
i}||v r|| S td| )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r   r'   _cosine_distancer(   _euclidean_distancer)   _manhattan_distancer*   _chebyshev_distancer+   _hamming_distance
ValueError)r=   rB   metricsr   r   r   _get_metric   s   
z(_EmbeddingDistanceChainMixin._get_metricabc                 C   s6   zddl m} W n ty   tdw d|| | S )zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)langchain_community.utils.mathrM   r   )rK   rL   rM   r   r   r   rC      s   z-_EmbeddingDistanceChainMixin._cosine_distancec                 C   s   t j| | S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        )nplinalgnormrK   rL   r   r   r   rD      s   z0_EmbeddingDistanceChainMixin._euclidean_distancec                 C      t t | | S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        )rO   sumabsrR   r   r   r   rE         z0_EmbeddingDistanceChainMixin._manhattan_distancec                 C   rS   )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        )rO   maxrU   rR   r   r   r   rF      rV   z0_EmbeddingDistanceChainMixin._chebyshev_distancec                 C   s   t | |kS )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        )rO   meanrR   r   r   r   rG      s   z._EmbeddingDistanceChainMixin._hamming_distancevectorsc                 C   s6   |  | j}||d dd|d dd }|S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r      )rJ   r0   reshapeitem)r=   rY   rB   r;   r   r   r   _compute_score   s   	&z+_EmbeddingDistanceChainMixin._compute_scoreN)$r#   r$   r%   r&   r   r   r.   r
   __annotations__r   r'   r0   r   r   strr   r9   r   model_configpropertyr   r>   dictrA   rJ   staticmethodrO   ndarrayrC   floatingrD   rE   rF   rG   floatr^   r   r   r   r   r,   <   s0   
 $+r,   c                   @   sF  e Zd ZdZedefddZedefddZede	e fddZ
		dd
eeef dee deeef fddZ		dd
eeef dee deeef fddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	S )EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                 C   s   dS )zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr   r<   r   r   r   requires_reference   s   z-EmbeddingDistanceEvalChain.requires_referencec                 C      d| j j dS )N
embedding_	_distancer0   valuer<   r   r   r   evaluation_name     z*EmbeddingDistanceEvalChain.evaluation_namec                 C      ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer   r<   r   r   r   
input_keys     z%EmbeddingDistanceEvalChain.input_keysNinputsrun_managerc                 C   0   t | j|d |d g}| |}d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        rs   rt   r;   rO   arrayr.   embed_documentsr^   r=   rw   rx   rY   r;   r   r   r   _call  s
   
z EmbeddingDistanceEvalChain._callc                    <   | j |d |d gI dH }t|}| |}d|iS )a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        rs   rt   Nr;   r.   aembed_documentsrO   r{   r^   r=   rw   rx   embeddedrY   r;   r   r   r   _acall)     


z!EmbeddingDistanceEvalChain._acallF)rt   	callbackstagsmetadatainclude_run_infors   rt   r   r   r   r   kwargsc          	      K   "   | ||d||||d}|  |S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        rs   rt   rw   r   r   r   r   rA   	r=   rs   rt   r   r   r   r   r   r?   r   r   r   _evaluate_strings?  s   
z,EmbeddingDistanceEvalChain._evaluate_stringsc          	         ,   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NacallrA   r   r   r   r   _aevaluate_stringsa  s   
z-EmbeddingDistanceEvalChain._aevaluate_stringsN)r#   r$   r%   r&   rb   boolri   r`   ro   r   ru   r   r   r   r   r~   r   r   r	   rc   r   r   r   r   r   r   rh      s    







	

&
	
rh   c                   @   s(  e Zd ZdZedee fddZedefddZ	dde	ee
f d	ee de	ee
f fd
dZ	dde	ee
f d	ee de	ee
f fddZddddddedededeee  dee	ee
f  dede
defddZddddddedededeee  dee	ee
f  dede
defddZdS )"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 C   rq   )rr   rs   prediction_br   r<   r   r   r   ru     rv   z-PairwiseEmbeddingDistanceEvalChain.input_keysc                 C   rj   )Npairwise_embedding_rl   rm   r<   r   r   r   ro     rp   z2PairwiseEmbeddingDistanceEvalChain.evaluation_nameNrw   rx   c                 C   ry   )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        rs   r   r;   rz   r}   r   r   r   r~     s   
z(PairwiseEmbeddingDistanceEvalChain._callc                    r   )a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        rs   r   Nr;   r   r   r   r   r   r     r   z)PairwiseEmbeddingDistanceEvalChain._acallF)r   r   r   r   rs   r   r   r   r   r   r   c          	      K   r   )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        rs   r   r   r   	r=   rs   r   r   r   r   r   r   r?   r   r   r   _evaluate_string_pairs  s   
z9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsc          	         r   )a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   r   r   r   _aevaluate_string_pairs  s   
z:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairsr   )r#   r$   r%   r&   rb   r   r`   ru   ro   r   r   r   r   r~   r   r   r	   r   rc   r   r   r   r   r   r   r     s    	






	

(
	
r   )"r&   enumr   typingr   r   r   r   numpyrO    langchain_core.callbacks.managerr   r   r	   langchain_core.embeddingsr
   langchain_core.utilsr   pydanticr   r   langchain.chains.baser   langchain.evaluation.schemar   r   langchain.schemar   r   r`   r   r,   rh   r   r   r   r   r   <module>   s(     8 
