
    i]4                       d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ erdd	lmZ d
Z G d de          Z ed          d.d            Z  ed          d/d            Z! ed          d/d            Z" ed          d/d            Z# ed          d/d            Z$ G d de
          Z% ed          d0d            Z&d1d"Z'	 	 d2d3d)Z(d4d+Z)d5d,Z*	 	 d2d3d-Z+d#S )6z
Grapheme cluster segmentation following Unicode Standard Annex #29.

This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
defined in UAX #29: Unicode Text Segmentation.

https://www.unicode.org/reports/tr29/
    )annotations)IntEnum)	lru_cache)TYPE_CHECKINGOptional
NamedTuple   )bisearch)
GRAPHEME_L
GRAPHEME_T
GRAPHEME_VGRAPHEME_LVINCB_EXTENDINCB_LINKERGRAPHEME_LVTINCB_CONSONANTGRAPHEME_EXTENDGRAPHEME_CONTROLGRAPHEME_PREPENDGRAPHEME_SPACINGMARKEXTENDED_PICTOGRAPHICGRAPHEME_REGIONAL_INDICATOR)Iterator    c                  J    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdS )GCBz'Grapheme Cluster Break property values.r   r	                        	   
            N)__name__
__module____qualname____doc__OTHERCRLFCONTROLEXTENDZWJREGIONAL_INDICATORPREPENDSPACING_MARKLVTLVLVT     W/home/piyush/.hermes/hermes-agent/venv/lib/python3.11/site-packages/wcwidth/grapheme.pyr   r   ,   s[        11E	
B	
BGF
CGL	A
A
A	B
CCCr<   r   i   )maxsizeucsintreturnc                   | dk    rt           j        S | dk    rt           j        S | dk    rt           j        S t	          | t
                    rt           j        S t	          | t                    rt           j        S t	          | t                    rt           j
        S t	          | t                    rt           j        S t	          | t                    rt           j        S t	          | t                    rt           j        S t	          | t"                    rt           j        S t	          | t&                    rt           j        S t	          | t*                    rt           j        S t	          | t.                    rt           j        S t           j        S )z;Return the Grapheme_Cluster_Break property for a codepoint.r(   r%   i   )r   r.   r/   r2   	_bisearchr   r0   r   r1   r   r3   r   r4   r   r5   r   r6   r   r7   r   r8   r   r9   r   r:   r-   r?   s    r=   _grapheme_cluster_breakrE   B   sA   
 f}}v
f}}v
f}}w&'' {o&& z122 &%%&'' {*++  j!! uj!! uj!! uk"" vl## w9r<   boolc                F    t          t          | t                              S )z6Check if codepoint has Extended_Pictographic property.)rF   rC   r   rD   s    r=   _is_extended_pictographicrH   e   s     	#455666r<   c                F    t          t          | t                              S )z,Check if codepoint has InCB=Linker property.)rF   rC   r   rD   s    r=   _is_incb_linkerrJ   k        	#{++,,,r<   c                F    t          t          | t                              S )z/Check if codepoint has InCB=Consonant property.)rF   rC   r   rD   s    r=   _is_incb_consonantrM   q   s     	#~..///r<   c                F    t          t          | t                              S )z,Check if codepoint has InCB=Extend property.)rF   rC   r   rD   s    r=   _is_incb_extendrO   w   rK   r<   c                  (    e Zd ZU dZded<   ded<   dS )BreakResultz*Result of grapheme cluster break decision.rF   should_breakr@   ri_countN)r)   r*   r+   r,   __annotations__r;   r<   r=   rQ   rQ   }   s+         44MMMMMr<   rQ   prev_gcbcurr_gcbOptional[BreakResult]c                   | t           j        k    r!|t           j        k    rt          dd          S | t           j        t           j        t           j        fv rt          dd          S |t           j        t           j        t           j        fv rt          dd          S | t           j        k    rA|t           j        t           j        t           j        t           j        fv rt          dd          S | t           j        t           j        fv r+|t           j        t           j	        fv rt          dd          S | t           j        t           j	        fv r!|t           j	        k    rt          dd          S |t           j
        k    rt          dd          S |t           j        k    rt          dd          S | t           j        k    rt          dd          S dS )z
    Check simple GCB-pair-based break rules (cacheable).

    Returns BreakResult for rules that can be determined from GCB properties alone, or None if
    complex lookback rules (GB9c, GB11) need to be checked.
    Fr   rR   rS   TN)r   r.   r/   rQ   r0   r6   r7   r9   r:   r8   r1   r5   r4   )rU   rV   s     r=   _simple_break_checkrZ      s    36h#&00:::: CK000q9999 CK000q9999 35X#%)HHH:::: CFCE?""xCE35>'A'A:::: CGSU###CE(9(9:::: 3::::: 3###:::: 3;:::: 4r<   textstrcurr_idxrS   c                   t          | |          }||S |t          j        k    rt          dd          S t	          ||                   }t          |          rxd}|dz
  }|dk    rkt	          ||                   }	t          |	          rd}|dz  }n9t          |	          r|dz  }n$t          |	          r|rt          dd          S nn|dk    k| t          j        k    r{t          |          rl|dz
  }|dk    rat	          ||                   }	t          |	          }
|
t          j
        k    r|dz  }n!t          |	          rt          dd          S n|dk    a| t          j        k    r>|t          j        k    r.|dz  dk    rt          d|dz             S t          dd          S |t          j        k    rdnd}t          d|          S )z
    Determine if there should be a grapheme cluster break between prev and curr.

    Implements UAX #29 grapheme cluster boundary rules.
    NFr   rY   r	   Tr   )rZ   r   r2   rQ   ordrM   rJ   rO   rH   rE   r1   r3   )rU   rV   r[   r]   rS   resultcurr_ucs
has_linkeriprev_ucs	prev_props              r=   _should_breakrf      s    !844F 37::::
 4>""H(## 
qL1ff47||Hx(( 
!
Q ** Q#H--  G&EAFFFF 1ff 378BBqL1ff47||H/99ICJ&&Q*844 "BBBB 1ff 3)))h#:P.P.Pa<1EHqLIIIIq9999  666qqAHD8<<<<r<   NunistrstartendOptional[int]Iterator[str]c              #    K   | sdS t          |           }||}||k    s||k    rdS t          ||          }|}d}t          t          | |                             }|t          j        k    rd}t          |dz   |          D ]U}t          t          | |                             }t          ||| ||          }	|	j        }|	j	        r| ||         V  |}|}V| ||         V  dS )aT  
    Iterate over grapheme clusters in a Unicode string.

    Grapheme clusters are "user-perceived characters" - what a user would
    consider a single character, which may consist of multiple Unicode
    codepoints (e.g., a base character with combining marks, emoji sequences).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings.

    Example::

        >>> list(iter_graphemes('cafe\u0301'))
        ['c', 'a', 'f', 'e\u0301']
        >>> list(iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467'))
        ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
        >>> list(iter_graphemes('ok\U0001F1FA\U0001F1F8'))
        ['o', 'k', '\U0001F1FA\U0001F1F8']

    .. versionadded:: 0.3.0
    Nr   r	   )
lenminrE   r_   r   r3   rangerf   rS   rR   )
rg   rh   ri   lengthcluster_startrS   rU   idxrV   r`   s
             r=   iter_graphemesrs      s(     8  [[F
{||u
c6

C MH 's6%='9'9::H 3)))UQY$$ 
 
*3vc{+;+;<<x63II? 	 s*++++M s"
######r<   posc                V   t          | |dz
                     }|dk    r|dk    r| |dz
           dk    r|dz
  S |dk     r_|dk    rT|dk    rNt          | |dz
                     }|dk    r0t          |          t          j        k    rt	          | |dz
            S |dz
  S |dz
  }|dk    rk||z
  t
          k     r]t          | |                   }d|cxk    rdk     rn nn7t          |          t          j        k    rn|dz  }|dk    r||z
  t
          k     ]|}t          t          | |                             }|t          j        k    rdnd}t          |dz   |          D ]I}	t          t          | |	                             }
t          ||
| |	|          }|j
        }|j        r|	}|
}J|S )a  
    Find the start of the grapheme cluster containing the character before pos.

    Scans backwards from pos to find a safe starting point, then iterates forward using standard
    break rules to find the actual cluster boundary.

    :param text: The Unicode string.
    :param pos: Position to search before (exclusive).
    :returns: Start position of the grapheme cluster.
    r	   r%   r      r   r   )r_   rE   r   r4   _find_cluster_startMAX_GRAPHEME_SCANr0   r3   ro   rf   rS   rR   )r[   rt   	target_cpprev_cp
safe_startcprq   left_gcbrS   rc   	right_gcbr`   s               r=   rx   rx   <  s    DqM""I DSAXX$sQw-4*?*?Qw 4!88	T))$sQw-((G$#:7#C#Cs{#R#R*4q999Qw qJ
q..cJ.2CCCj!""2"2&&#+55a
 q..cJ.2CCC M&s4
+;'<'<==H 666qqAH:>3''  +CQLL99	xD!XFF? 	Mr<   c           	     h    |dk    rdS t          | t          |t          |                               S )a  
    Find the grapheme cluster boundary immediately before a position.

    :param unistr: The Unicode string to search.
    :param pos: Position in the string (0 < pos <= len(unistr)).
    :returns: Start index of the grapheme cluster containing the character at pos-1.

    Example::

        >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
        6
        >>> grapheme_boundary_before('a\r\nb', 3)
        1

    .. versionadded:: 0.3.6
    r   )rx   rn   rm   )rg   rt   s     r=   grapheme_boundary_beforer   p  s2    " axxqvs3F'<'<===r<   c              #     K   | sdS t          |           }||nt          ||          }t          |d          }||k    s||k    rdS |}||k    r.t          | |          }||k     rdS | ||         V  |}||k    ,dS dS )a  
    Iterate over grapheme clusters in reverse order (last to first).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings in reverse order.

    Example::

        >>> list(iter_graphemes_reverse('cafe\u0301'))
        ['e\u0301', 'f', 'a', 'c']

    .. versionadded:: 0.3.6
    Nr   )rm   rn   maxrx   )rg   rh   ri   rp   rt   rq   s         r=   iter_graphemes_reverser     s      (  [[FK&&Sf%5%5CqMME||u
C
+++FC885  E]3&'''' ++++++r<   )r?   r@   rA   r   )r?   r@   rA   rF   )rU   r   rV   r   rA   rW   )rU   r   rV   r   r[   r\   r]   r@   rS   r@   rA   rQ   )r   N)rg   r\   rh   r@   ri   rj   rA   rk   )r[   r\   rt   r@   rA   r@   )rg   r\   rt   r@   rA   r@   ),r,   
__future__r   enumr   	functoolsr   typingr   r   r   r
   rC   table_graphemer   r   r   r   r   r   r   r   r   r   r   r   r   r   collections.abcr   ry   r   rE   rH   rJ   rM   rO   rQ   rZ   rf   rs   rx   r   r   r;   r<   r=   <module>r      s=    # " " " " "             6 6 6 6 6 6 6 6 6 6 , + + + + +: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :  )((((((      '   , 4   D 47 7 7 7
 4- - - -
 40 0 0 0
 4- - - -
    *    4- - - -`@= @= @= @=J A$ A$ A$ A$ A$H1 1 1 1h> > > >0 & & & & & & &r<   