
    iF<                    B    d Z ddlmZ ddlmZ dgZ G d d          ZdS )u  Stateful scrubber for reasoning/thinking blocks in streamed assistant text.

``run_agent._strip_think_blocks`` is regex-based and correct for a complete
string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
the state that downstream consumers (CLI ``_stream_delta``, gateway
``GatewayStreamConsumer._filter_and_accumulate``) rely on.

Concretely, when MiniMax-M2.7 streams

    delta1 = "<think>"
    delta2 = "Let me check their config"
    delta3 = "</think>"

the per-delta regex erases delta1 entirely (case 2: unterminated-open at
boundary matches ``^<think>...``), so the downstream state machine never
sees the open tag, treats delta2 as regular content, and leaks reasoning
to the user.  Consumers that don't run their own state machine (ACP,
api_server, TTS) never had any defence at all — they just emitted
whatever survived the upstream regex.

This module centralises the tag-suppression state machine at the
upstream layer so every stream_delta_callback sees text that has
already had reasoning blocks removed.  Partial tags at delta
boundaries are held back until the next delta resolves them, and
end-of-stream flushing surfaces any held-back prose that turned out
not to be a real tag.

Usage::

    scrubber = StreamingThinkScrubber()
    for delta in stream:
        visible = scrubber.feed(delta)
        if visible:
            emit(visible)
    tail = scrubber.flush()  # at end of stream
    if tail:
        emit(tail)

The scrubber is re-entrant per agent instance.  Call ``reset()`` at
the top of each new turn so a hung block from an interrupted prior
stream cannot taint the next turn's output.

Tag variants handled (case-insensitive):
  ``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
  ``<REASONING_SCRATCHPAD>``.

Block-boundary rule for opens: an opening tag is only treated as a
reasoning-block opener when it appears at the start of the stream,
after a newline (optionally followed by whitespace), or when only
whitespace has been emitted on the current line.  This prevents prose
that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
being incorrectly suppressed.  Closed pairs (``<think>X</think>``) are
always suppressed regardless of boundary; a closed pair is an
intentional, bounded construct.
    )annotations)TupleStreamingThinkScrubberc                  D   e Zd ZU dZdZded<    ed eD                       Zded<    ed eD                       Zded<    e	d	 eez   D                       Z
d
ed<   d"dZd"dZd#dZd$dZed%d            Zd&dZd'dZd(dZed)d            Zed#d             Zd!S )*r   an  Stateful scrubber for streaming reasoning/thinking blocks.

    State machine:
      - ``_in_block``: True while inside an opened block, waiting for
        a close tag.  All text inside is discarded.
      - ``_buf``: held-back partial-tag tail.  Emitted / discarded on
        the next ``feed()`` call or by ``flush()``.
      - ``_last_emitted_ended_newline``: True iff the most recent
        emission to the consumer ended with ``\n``, or nothing has
        been emitted yet (start-of-stream counts as a boundary).  Used
        to decide whether an open tag at buffer position 0 is at a
        block boundary.
    )thinkthinking	reasoningthoughtREASONING_SCRATCHPADTuple[str, ...]_OPEN_TAG_NAMESc              #  "   K   | ]
}d | dV  dS )<>N .0names     9/home/piyush/.hermes/hermes-agent/agent/think_scrubber.py	<genexpr>z StreamingThinkScrubber.<genexpr>Y   s*      'P'PD'P'P'P'P'P'P    
_OPEN_TAGSc              #  "   K   | ]
}d | dV  dS )</r   Nr   r   s     r   r   z StreamingThinkScrubber.<genexpr>Z   s*      (R(R$d(R(R(R(R(R(Rr   _CLOSE_TAGSc              #  4   K   | ]}t          |          V  d S )N)len)r   tags     r   r   z StreamingThinkScrubber.<genexpr>]   s(      IICHHIIIIIIr   int_MAX_TAG_LENreturnNonec                0    d| _         d| _        d| _        d S )NF T	_in_block_buf_last_emitted_ended_newlineselfs    r   __init__zStreamingThinkScrubber.__init___   s    $	15(((r   c                0    d| _         d| _        d| _        dS )z4Reset all state.  Call at the top of every new turn.Fr$   TNr%   r)   s    r   resetzStreamingThinkScrubber.resetd   s    	+/(((r   textstrc                *   |sdS | j         |z   }d| _         g }|re| j        r~|                     || j                  \  }}|dk    rD|                     || j                  }|r|| d         nd| _         d                    |          S |||z   d         }d| _        n|                     |          }|                     ||          \  }}	|u|dk    s|d         |k    rc|\  }
}|d|
         }|rF|                     |          }|r/|	                    |           |
                    d          | _        ||d         }-|dk    rh|d|         }|rF|                     |          }|r/|	                    |           |
                    d          | _        d| _        |||	z   d         }|                     || j                  }|                     || j                  }t          ||          }|r|d|          }|| d         | _         n	|}d| _         |rF|                     |          }|r/|	                    |           |
                    d          | _        d                    |          S |ed                    |          S )zFeed one delta; return the scrubbed visible portion.

        May return an empty string when the entire delta is reasoning
        content or is being held back pending resolution of a partial
        tag at the boundary.
        r$   NFr   
T)r'   r&   _find_first_tagr   _max_partial_suffixjoin_find_earliest_closed_pair_find_open_at_boundary_strip_orphan_close_tagsappendendswithr(   r   max)r*   r.   bufout	close_idx	close_lenheldpairopen_idxopen_len	start_idxend_idx	preceding
held_close	emit_texts                  r   feedzStreamingThinkScrubber.feedj   s     	2i$	 Q	$~ P$'+';';)( ($	9 ??  33C9IJJD/3 ;TEFFDI773<<')i/001!& 66s;; &*%@%@& &"(
 #NNd1g&9&9)-&Iw #JYJI  $($A$A)$L$L	$ JJy111 ) 2 24 8 8 !< ghh-Cr>> !$IXII  $($A$A)$L$L	$ JJy111 ) 2 24 8 8 !< &*DNh1223C
 //T_EE!55) 
 4,, # #FdUFI #TEFFDII #I "DI  $ = =i H HI  

9---%..t44 8 wws||#c  Q	$f wws||r   c                    | j         rd| _        d| _         dS | j        }d| _        |sdS |                     |          }|r|                    d          | _        |S )u/  End-of-stream flush.

        If still inside an unterminated block, held-back content is
        discarded — leaking partial reasoning is worse than a
        truncated answer.  Otherwise the held-back partial-tag tail is
        emitted verbatim (it turned out not to be a real tag prefix).
        r$   Fr2   )r&   r'   r8   r:   r(   )r*   tails     r   flushzStreamingThinkScrubber.flush   sq     > 	DI"DN2y	 	2,,T22 	C/3}}T/B/BD,r   r<   tagsTuple[int, int]c                    |                                  }d}d}|D ]L}|                    |                                           }|dk    r|dk    s||k     r|}t          |          }M||fS )zfReturn (earliest_index, tag_length) over *tags*, or (-1, 0).

        Case-insensitive match.
        r1   r   )lowerfindr   )r<   rM   	buf_lowerbest_idxbest_lenr   idxs          r   r3   z&StreamingThinkScrubber._find_first_tag   sx     IIKK	 	$ 	$C..--Cbyyh"nnhs88!!r   c                   |                                 }d}t          | j        | j                  D ]\  }}|                                 }|                                 }|                    |          }|dk    rI|                    ||t          |          z             }	|	dk    rv|	t          |          z   }
|||d         k     r||
f}|S )a  Return (start_idx, end_idx) of the earliest closed pair, else None.

        A closed pair is ``<tag>...</tag>`` of any variant.  Matches are
        case-insensitive and non-greedy (the closest close tag after
        an open tag wins), matching the regex ``<tag>.*?</tag>``
        semantics of ``_strip_think_blocks`` case 1.  When two tag
        variants could both match, the one whose open tag appears
        earlier wins.
        Nr1   r   )rP   zipr   r   rQ   r   )r*   r<   rR   bestopen_tag	close_tag
open_lowerclose_lowerrB   r>   rE   s              r   r6   z1StreamingThinkScrubber._find_earliest_closed_pair   s     IIKK	)-#&t8H#I#I 	+ 	+Hi!))J#//++K ~~j11H2~~!XJ7 I B#k"2"22G|x$q'11 '*r   already_emitted	list[str]c                ,   |                                 }d}d}| j        D ]q}|                                 }d}	 |                    ||          }	|	dk    rn;|                     ||	|          r|dk    s|	|k     r|	}t	          |          }n|	dz   }Xr||fS )zReturn the earliest block-boundary open-tag (idx, len).

        Returns (-1, 0) if no boundary-legal opener is present.
        r1   r   T   )rP   r   rQ   _is_block_boundaryr   )
r*   r<   r]   rR   rS   rT   r   	tag_lowersearch_startrU   s
             r   r7   z-StreamingThinkScrubber._find_open_at_boundary  s     IIKK	? 	' 	'C		IL	'nnY=="99**3_EE 2~~x#&#&s88"Qw	' !!r   rU   boolc                d   |dk    r$|r|d                              d          S | j        S |d|         }|                    d          }|dk    r?|r|d                              d          }n| j        }|o|                                dk    S ||dz   d                                         dk    S )a  True iff position *idx* in *buf* is a block boundary.

        A block boundary is:
          - buf position 0 AND the most recent emission ended with
            a newline (or nothing has been emitted yet)
          - any position whose preceding text on the current line
            (since the last newline in buf) is whitespace-only, AND
            if there is no newline in the preceding buf portion, the
            most recent prior emission ended with a newline
        r   r1   r2   Nr$   r`   )r:   r(   rfindstrip)r*   r<   rU   r]   rF   last_nlprior_newlines          r   ra   z)StreamingThinkScrubber._is_block_boundary*  s     !88  :&r*33D99933I	//$''b==  A / 3 < <T B B $ @ <Y__%6%6"%<< 1&,,.."44r   c                L   |sdS |                                 }t          t          |          | j        dz
            }t	          |dd          D ]T}|| d         }|D ]D}|                                 }t          |          |k    r|                    |          r|c c S EUdS )zReturn the longest buf-suffix that is a prefix of any tag.

        Only prefixes strictly shorter than the tag itself count
        (full-length suffixes are the tag and are handled as matches,
        not held-back partials).  Case-insensitive.
        r   r`   r1   N)rP   minr   r    range
startswith)	clsr<   rM   rR   	max_checkisuffixr   rb   s	            r   r4   z*StreamingThinkScrubber._max_partial_suffixM  s      	1IIKK	I(81(<==	y!R(( 	 	Arss^F  IIKK	y>>A%%)*>*>v*F*F%HHHHH qr   c                .   d|vr|S |                                 }g }d}|t          |          k     rd}|||dz            dk    r| j        D ]}|                                 }t          |          }||||z            |k    rJ||z   }	|	t          |          k     r,||	         dv r"|	dz  }	|	t          |          k     r
||	         dv "|	}d} n|s |                    ||                    |dz  }|t          |          k     d                    |          S )	a  Remove any close tags from *text* (orphan-close handling).

        An orphan close tag has no matching open in the current
        scrubber state; it's always noise, stripped with any trailing
        whitespace so the surrounding prose flows naturally.
        r   r   F   z 	
r`   Tr$   )rP   r   r   r9   r5   )
rn   r.   
text_lowerr=   rp   matchedr   rb   tag_lenjs
             r   r8   z/StreamingThinkScrubber._strip_orphan_close_tagsc  sH    tKZZ\\
#d))mmG!AE'"d**?  C #		I!)nnG!!AK-0I== K#d))mmQ90D0DFA  #d))mmQ90D0D"& >  

47###Q# #d))mm$ wws||r   N)r!   r"   )r.   r/   r!   r/   )r!   r/   )r<   r/   rM   r   r!   rN   )r<   r/   )r<   r/   r]   r^   r!   rN   )r<   r/   rU   r   r]   r^   r!   rd   )r<   r/   rM   r   r!   r   )__name__
__module____qualname____doc__r   __annotations__tupler   r   r;   r    r+   r-   rI   rL   staticmethodr3   r6   r7   ra   classmethodr4   r8   r   r   r   r   r   @   s         (O     #(%'P'P'P'P'P"P"PJPPPP#(5(R(R/(R(R(R#R#RKRRRR II
[0HIIIIILIIII6 6 6 6
0 0 0 0` ` ` `D   . " " " \""   8" " " "2!5 !5 !5 !5F    [*    [  r   N)r{   
__future__r   typingr   __all__r   r   r   r   <module>r      sz   6 6p # " " " " "      #
$B B B B B B B B B Br   