o
    Uh,                     @  s   d dl mZ d dlmZmZ d dlmZmZ d dlm	Z	m
Z
 dddddZ	dd	d
ZdddddZdddddZdddddZdd ZddddZddddZdS )    )annotations)common_affixconv_sequences)is_nonesetupPandas)EditopEditopsN)	processorscore_cutoffc                C  s   |dur|| } ||}| sdS t | |\} }dt| > d }i }|j}d}| D ]}||d|B ||< |dK }q(|D ]}	||	d}
||
@ }|| || B }q:t|t|  d d}|du sd||krf|S dS )a  
    Calculates the length of the longest common subsequence

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2
    Nr      0)r   lengetbincount)s1s2r	   r
   Sblock	block_getxch1ch2Matchesures r   p/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/rapidfuzz/distance/LCSseq_py.py
similarity
   s&   

r   c           
      C  s~   |sdS dt |> d }| j}|D ]}||d}||@ }|| || B }qt|t | d  d}	|d u s;|	|kr=|	S dS Nr   r   r   )r   r   r   r   )
r   r   r   r
   r   r   r   r   r   r   r   r   r   _block_similarityB   s   
r    c                C  sf   |dur|| } ||}t | |\} }tt| t|}t| |}|| }|du s-||kr/|S |d S )a  
    Calculates the LCS distance in the range [0, max].

    This is calculated as ``max(len1, len2) - similarity``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Examples
    --------
    Find the LCS distance between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    Nr   )r   maxr   r   )r   r   r	   r
   maximumsimdistr   r   r   distanceX   s   /
r%   c                C  s   t   t| st|rdS |dur|| } ||}| r|sdS t| |\} }tt| t|}t| || }|du s>||kr@|S dS )a2  
    Calculates a normalized LCS similarity in the range [1, 0].

    This is calculated as ``distance / max(len1, len2)``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
        which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 0 and 1.0
          ?Nr   r   )r   r   r   r!   r   r%   )r   r   r	   r
   r"   norm_simr   r   r   normalized_distance   s   r(   c                C  sX   t   t| st|rdS |dur|| } ||}dt| | }|du s(||kr*|S dS )a  
    Calculates a normalized LCS similarity in the range [0, 1].

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Examples
    --------
    Find the normalized LCS similarity between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein")
    0.8181818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
    0.0

    When a different processor is used s1 and s2 do not have to be strings

    >>> LCSseq.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        Nr&   r   )r   r   r(   )r   r   r	   r
   r'   r   r   r   normalized_similarity   s   2r)   c                 C  s   | sdg fS dt | > d }i }|j}d}| D ]}||d|B ||< |dK }qg }|D ]}||d}	||	@ }
||
 ||
 B }|| q+t|t |  d  d}||fS r   )r   r   appendr   r   )r   r   r   r   r   r   r   matrixr   r   r   r#   r   r   r   _matrix   s"   

r,   r	   c                C  s  |dur|| } ||}t | |\} }t| |\}}| |t| |  } ||t||  }t| |\}}tg dd}t| | | |_t|| | |_t| t| d|  }|dkr_|S dg| }	t| }
t|}|dkr|
dkr||d  d|
d > @ r|d8 }|
d8 }
td|
| || |	|< n'|d8 }|r||d  d|
d > @ s|d8 }td|
| || |	|< n|
d8 }
|dkr|
dkst|
dkr|d8 }|
d8 }
td|
| || |	|< |
dks|dkr|d8 }|d8 }td|
| || |	|< |dks|	|_|S )uf  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described in [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq
    >>> for tag, src_pos, dest_pos in LCSseq.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[0] s2[0]
     delete s1[3] s2[2]
     insert s1[4] s2[2]
     insert s1[6] s2[5]
    Nr      r   deleteinsert)	r   r   r   r,   r   _src_len	_dest_lenr   _editops)r   r   r	   
prefix_len
suffix_lenr#   r+   editopsr$   editop_listcolrowr   r   r   r6     sP   ,
r6   c                C  s   t | ||d S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described in [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in LCSseq.opcodes(a, b):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
     delete a[3:4] (x) b[2:2] ()
     insert a[4:4] () b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    r-   )r6   
as_opcodes)r   r   r	   r   r   r   opcodesx  s   2r;   )N)
__future__r   rapidfuzz._common_pyr   r   rapidfuzz._utilsr   r   !rapidfuzz.distance._initialize_pyr   r   r   r    r%   r(   r)   r,   r6   r;   r   r   r   r   <module>   s.   <
>4>d