o
    Uhd                     @  s  d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZmZ dd Zd	eiZd
d Zdd Zdd ZdddddZdd ZdddddZdddddZdddddZdddddZdddddZdddd d!Z dddd"d#Z!dddd$d%Z"dddd&d'Z#dddd(d)Z$eee eee eee eee eee ee e ee!e ee"e ee#e ee$e dS )*    )annotations)ceil)conv_sequences)
ScorerFlagadd_scorer_attrsis_nonesetupPandas)ScoreAlignment)_block_normalized_similaritydistancenormalized_similarityc                  K  s   ddt jt jB dS )Nd   r   )optimal_scoreworst_scoreflags)r   
RESULT_F64	SYMMETRIC)_kwargs r   e/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/rapidfuzz/fuzz_py.pyget_scorer_flags_fuzz   s   
r   get_scorer_flagsc                 C  s(   |r
dd|  |  nd}||kr|S dS )Nr   r   r   )distlensumscore_cutoffscorer   r   r   _norm_distance   s   r   c                 C  sl   t | ttfr|  S g g}| D ]}t |tr|nt|}| r'|g  q|d | qdd |D S )Nc                 S  s   g | ]}|rt |qS r   )tuple).0xr   r   r   
<listcomp>-   s    z#_split_sequence.<locals>.<listcomp>)
isinstancestrbytessplitchrisspaceappend)seqsplitted_seqr    chr   r   r   _split_sequence!   s   r,   c                 C  sp   | sdS t tt| trd| S t tt| tr d| S g }| D ]}||7 }|tdg7 }q$|d d S )N      r   )r"   nextiterr#   joinr$   ord)seq_listjoinedr)   r   r   r   _join_splitted_sequence0   s   

r6   N	processorr   c                C  sB   t   t| st|rdS |dur|d }t| |||d}|d S )a  
    Calculates the normalized Indel similarity.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    See Also
    --------
    rapidfuzz.distance.Indel.normalized_similarity : Normalized Indel similarity

    Notes
    -----
    .. image:: img/ratio.svg

    Examples
    --------
    >>> fuzz.ratio("this is a test", "this is a test!")
    96.55171966552734
    r   Nr   r7   )r   r   indel_normalized_similarity)s1s2r8   r   r   r   r   r   ratio?   s   *r<   c                 C  s  t | }t| }t|}tdd|d|}i }|j}d}	| D ]}
||
d|	B ||
< |	dK }	qtd|D ]5}||d  }||vr?q2t|| |d| |d}||jkrg| |_}d|_||_|jdkrgd|_|  S q2t|| D ];}||| d  }||vr}qnt|| ||||  |d}||jkr| |_}||_|| |_|jdkrd|_|  S qnt|| |D ]3}|| }||vrqt|| ||d |d}||jkr| |_}||_||_|jdkrd|_|  S q| jd9  _|S )zK
    implementation of partial_ratio. This assumes len(s1) <= len(s2).
    r      Nr   r   )	setlenr	   getrange!indel_block_normalized_similarityr   
dest_startdest_end)r:   r;   r   s1_char_setlen1len2resblock	block_getr    ch1isubstr_lastls_ratiosubstr_firstr   r   r   _partial_ratio_implt   sf   










rQ   c                C  s"   t | |||d}|du rdS |jS )u	  
    Searches for the optimal alignment of the shorter string in the
    longer string and returns the fuzz.ratio for this alignment.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    Depending on the length of the needle (shorter string) different
    implementations are used to improve the performance.

    short needle (length ≤ 64):
        When using a short needle length the fuzz.ratio is calculated for all
        alignments that could result in an optimal alignment. It is
        guaranteed to find the optimal alignment. For short needles this is very
        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst
        case performance of ``O(NM)``.

    .. image:: img/partial_ratio_short_needle.svg

    long needle (length > 64):
        For long needles a similar implementation to FuzzyWuzzy is used.
        This implementation only considers alignments which start at one
        of the longest common substrings. This results in a worst case performance
        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.
        The following Python code shows the concept:

        .. code-block:: python

            blocks = SequenceMatcher(None, needle, longer, False).get_matching_blocks()
            score = 0
            for block in blocks:
                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
                long_end = long_start + len(shorter)
                long_substr = longer[long_start:long_end]
                score = max(score, fuzz.ratio(needle, long_substr))

        This is a lot faster than checking all possible alignments. However it
        only finds one of the best alignments and not necessarily the optimal one.

    .. image:: img/partial_ratio_long_needle.svg

    Examples
    --------
    >>> fuzz.partial_ratio("this is a test", "this is a test!")
    100.0
    r7   Nr   )partial_ratio_alignmentr   )r:   r;   r8   r   	alignmentr   r   r   partial_ratio   s   GrT   c          
      C  s&  t   t| st|rdS |dur|| } ||}|du rd}| s+|s+tdddddS t| |\} }t| }t|}||krC| }|}n|}| }t|||d }|jdkry||kryt||j}t|||d }	|	j|jkryt|	j|	j|	j	|	j
|	j}|j|k rdS ||kr|S t|j|j|j	|j
|jS )a  
    Searches for the optimal alignment of the shorter string in the
    longer string and returns the fuzz.ratio and the corresponding
    alignment.

    Parameters
    ----------
    s1 : str | bytes
        First string to compare.
    s2 : str | bytes
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff None is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    alignment : ScoreAlignment, optional
        alignment between s1 and s2 with the score as a float between 0 and 100

    Examples
    --------
    >>> s1 = "a certain string"
    >>> s2 = "cetain"
    >>> res = fuzz.partial_ratio_alignment(s1, s2)
    >>> res
    ScoreAlignment(score=83.33333333333334, src_start=2, src_end=8, dest_start=0, dest_end=6)

    Using the alignment information it is possible to calculate the same fuzz.ratio

    >>> fuzz.ratio(s1[res.src_start:res.src_end], s2[res.dest_start:res.dest_end])
    83.33333333333334
    Nr   g      Y@r   )r   r   r	   r   r@   rQ   r   maxrD   rE   	src_startsrc_end)
r:   r;   r8   r   rG   rH   shorterlongerrI   res2r   r   r   rR     s:   ,
rR   c                C  n   t   t| st|rdS |dur|| } ||}t| |\} }ttt| }ttt|}t|||dS )a  
    Sorts the words in the strings and calculates the fuzz.ratio between them

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_sort_ratio.svg

    Examples
    --------
    >>> fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
    100.0
    r   Nr>   )r   r   r   r6   sortedr,   r<   r:   r;   r8   r   	sorted_s1	sorted_s2r   r   r   token_sort_ratioW  s   &r`   c                C  s  t   t| st|rdS |dur|| } ||}|du rd}t| |\} }tt| }tt|}|r6|s8dS ||}||}||}|rO|rM|sOdS tt|}	tt|}
t	|	}t	|
}t	t|}||dk | }||dk | }d}t
|| d|d   }t|	|
|d}||krt||| |}|s|S |dk| }t||| |}|dk| }t||| |}t|||S )a  
    Compares the words in the strings based on unique and common words between them
    using fuzz.ratio

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_set_ratio.svg

    Examples
    --------
    >>> fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
    83.8709716796875
    >>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
    100.0
    # Returns 100.0 if one string is a subset of the other, regardless of extra content in the longer string
    >>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear")
    100.0
    # Score is reduced only when there is explicit disagreement in the two strings
    >>> fuzz.token_set_ratio("fuzzy was a bear but not a dog", "fuzzy was a bear but not a cat")
    92.3076923076923
    r   Nr   g        r=   r>   )r   r   r   r?   r,   intersection
differencer6   r\   r@   r   indel_distancer   rU   )r:   r;   r8   r   tokens_atokens_b	intersectdiff_abdiff_badiff_ab_joineddiff_ba_joinedab_lenba_lensect_lensect_ab_lensect_ba_lenresultcutoff_distancer   sect_ab_distsect_ab_ratiosect_ba_distsect_ba_ratior   r   r   token_set_ratio  sJ   /


rv   c                C  sT   t   t| st|rdS |dur|| } ||}tt| |d|dt| |d|dS )aU  
    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio
    (faster than manually executing the two functions)

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/token_ratio.svg
    r   Nr7   )r   r   rU   rv   r`   r:   r;   r8   r   r   r   r   token_ratio  s   "rx   c                C  r[   )a$  
    sorts the words in the strings and calculates the fuzz.partial_ratio between them

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_sort_ratio.svg
    r   Nr>   )r   r   r   r6   r\   r,   rT   r]   r   r   r   partial_token_sort_ratio+  s   !ry   c                C  s   t   t| st|rdS |dur|| } ||}t| |\} }tt| }tt|}|r0|s2dS ||r9dS tt||}tt||}t	|||dS )a>  
    Compares the words in the strings based on unique and common words between them
    using fuzz.partial_ratio

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_set_ratio.svg
    r   Nr   r>   )
r   r   r   r?   r,   ra   r6   r\   rb   rT   )r:   r;   r8   r   rd   re   rg   rh   r   r   r   partial_token_set_ratioZ  s    "
rz   c                C  s   t   t| st|rdS |dur|| } ||}|du rd}t| |\} }t| }t|}t|}t|}||r=dS ||}||}	ttt	|tt	||d}
t
|t
|krht
|t
|	krh|
S t||
}t|
ttt	|tt	|	|dS )am  
    Helper method that returns the maximum of fuzz.partial_token_set_ratio and
    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/partial_token_ratio.svg
    r   Nr   r>   )r   r   r   r,   r?   ra   rb   rT   r6   r\   r@   rU   )r:   r;   r8   r   tokens_split_atokens_split_brd   re   rg   rh   rp   r   r   r   partial_token_ratio  sB   "




 


r}   c          
      C  s  t   t| st|rdS d}|dur|| } ||}| r|s!dS |du r'd}t| }t|}||kr7|| n|| }t| ||d}|dk rZt||| }t|t| ||dd| S |dk r`dnd	}	t|||	 }t|t| ||d|	 }t||| }t|t| ||dd| |	 S )
a   
    Calculates a weighted ratio based on the other ratio algorithms

    Parameters
    ----------
    s1 : str
        First string to compare.
    s2 : str
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Notes
    -----
    .. image:: img/WRatio.svg
    r   gffffff?Nr>   g      ?)r   r8   g       @g?g333333?)r   r   r@   r<   rU   rx   rT   r}   )
r:   r;   r8   r   UNBASE_SCALErG   rH   	len_ratio	end_ratioPARTIAL_SCALEr   r   r   WRatio  s:   !r   c                C  sL   t   t| st|rdS |dur|| } ||}| r|sdS t| ||dS )a  
    Calculates a quick ratio between two strings using fuzz.ratio.

    Since v3.0 this behaves similar to fuzz.ratio with the exception that this
    returns 0 when comparing two empty strings

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 100.
        For ratio < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    similarity : float
        similarity between s1 and s2 as a float between 0 and 100

    Examples
    --------
    >>> fuzz.QRatio("this is a test", "this is a test!")
    96.55171966552734
    r   Nr>   )r   r   r<   rw   r   r   r   QRatio0  s   %r   )%
__future__r   mathr   rapidfuzz._common_pyr   rapidfuzz._utilsr   r   r   r   rapidfuzz.distancer	   rapidfuzz.distance.Indel_pyr
   rC   r   rc   r   r9   r   fuzz_attributer   r,   r6   r<   rQ   rT   rR   r`   rv   rx   ry   rz   r}   r   r   r   r   r   r   <module>   sn   5FRW8s53@TN
4







