
    i@+                        d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	m
Z
mZ ddlmZ  ej        e          ZdZdZd)d
Zd*dZddddd+dZd,dZd-dZd.dZdZdddd/d Zd0d"Zd1d%Zd2d(ZdS )3a)  Cross-session rate limit guard for Nous Portal.

Writes rate limit state to a shared file so all sessions (CLI, gateway,
cron, auxiliary) can check whether Nous Portal is currently rate-limited
before making requests.  Prevents retry amplification when RPH is tapped.

Each 429 from Nous triggers up to 9 API calls per conversation turn
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
against RPH.  By recording the rate limit state on first 429 and checking
it before subsequent attempts, we eliminate the amplification effect.
    )annotationsN)AnyMappingOptional)atomic_replacerate_limitsz	nous.jsonreturnstrc                    	 ddl m}   |             }nM# t          $ r@ t          j                            t          j                            d          d          }Y nw xY wt          j                            |t          t                    S )z2Return the path to the Nous rate limit state file.r   )get_hermes_home~z.hermes)	hermes_constantsr   ImportErrorospathjoin
expanduser_STATE_SUBDIR_STATE_FILENAME)r   bases     :/home/piyush/.hermes/hermes-agent/agent/nous_rate_guard.py_state_pathr      s    @444444   @ @ @w||BG..s33Y??@7<<m_===s    AAAheadersOptional[Mapping[str, str]]Optional[float]c                    | sdS d |                                  D             }dD ]K}|                    |          }|2	 t          |          }|dk    r|c S 4# t          t          f$ r Y Gw xY wLdS )uf  Extract the best available reset-time estimate from response headers.

    Priority:
      1. x-ratelimit-reset-requests-1h  (hourly RPH window — most useful)
      2. x-ratelimit-reset-requests     (per-minute RPM window)
      3. retry-after                     (generic HTTP header)

    Returns seconds-from-now, or None if no usable header found.
    Nc                >    i | ]\  }}|                                 |S  lower.0kvs      r   
<dictcomp>z(_parse_reset_seconds.<locals>.<dictcomp>4   &    8881qwwyy!888    )zx-ratelimit-reset-requests-1hzx-ratelimit-reset-requestszretry-afterr   )itemsgetfloat	TypeError
ValueError)r   loweredkeyrawvals        r   _parse_reset_secondsr1   '   s      t88888G  
 kk#?Cjj77JJJ z*     4s   AA-,A-g     r@)r   error_contextdefault_cooldownr2   Optional[dict[str, Any]]r3   r*   Nonec                   t          j                     }d}t          |           }|||z   }|[t          |t                    rF|                    d          }t          |t
          t          f          r||k    rt          |          }|||z   }t                      }	 t          j	        
                    |          }t          j        |d           ||||z
  d}	t          j        |d          \  }
}	 t          j        |
d          5 }t          j        |	|           ddd           n# 1 swxY w Y   t#          ||           n5# t$          $ r( 	 t          j        |           n# t(          $ r Y nw xY w w xY wt*                              d	||z
  |           dS # t$          $ r&}t*                              d
|           Y d}~dS d}~ww xY w)a  Record that Nous Portal is rate-limited.

    Parses the reset time from response headers or error context.
    Falls back to ``default_cooldown`` (5 minutes) if no reset info
    is available.  Writes to a shared file that all sessions can read.

    Args:
        headers: HTTP response headers from the 429 error.
        error_context: Structured error context from _extract_api_error_context().
        default_cooldown: Fallback cooldown in seconds when no header data.
    Nreset_atT)exist_ok)r7   recorded_atreset_secondsz.tmp)dirsuffixwz3Nous rate limit recorded: resets in %.0fs (at %.0f)z)Failed to write Nous rate limit state: %s)timer1   
isinstancedictr)   intr*   r   r   r   dirnamemakedirstempfilemkstempfdopenjsondumpr   	ExceptionunlinkOSErrorloggerinfodebug)r   r2   r3   nowr7   header_seconds	ctx_resetr   	state_dirstatefdtmp_pathfexcs                 r   record_nous_rate_limitrX   G   sf   " )++CH *'22N!' J}d;;!%%j11	i#u.. 	(9s??Y''H ))==DGGOOD))	
I---- !%^
 
  'IfEEEH
	2s## $q	%###$ $ $ $ $ $ $ $ $ $ $ $ $ $ $8T**** 	 	 		(####   	 	AsNH	
 	
 	
 	
 	
  G G G@#FFFFFFFFFGsy   AF  7E
 D."E
 .D22E
 5D26E
 	F  

E<E*)E<*
E74E<6E77E<<"F   
G*GGc                    t                      } 	 t          |           5 }t          j        |          }ddd           n# 1 swxY w Y   |                    dd          }|t          j                    z
  }|dk    r|S 	 t          j        |            n# t          $ r Y nw xY wdS # t          t          j
        t          t          f$ r Y dS w xY w)zCheck if Nous Portal is currently rate-limited.

    Returns:
        Seconds remaining until reset, or None if not rate-limited.
    Nr7   r   )r   openrG   loadr)   r>   r   rJ   rK   FileNotFoundErrorJSONDecodeErrorKeyErrorr+   )r   rV   rS   r7   	remainings        r   nous_rate_limit_remainingr`      s    ==D$ZZ 	!1IaLLE	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!99Z++ty{{*	q==	IdOOOO 	 	 	D	tt3XyI   ttsW   B' A B'  AB' A6B'  B B' 
B"B' !B""B' '"CCc                     	 t          j        t                                 dS # t          $ r Y dS t          $ r&} t
                              d|            Y d} ~ dS d} ~ ww xY w)zCClear the rate limit state (e.g., after a successful Nous request).z)Failed to clear Nous rate limit state: %sN)r   rJ   r   r\   rK   rL   rN   )rW   s    r   clear_nous_rate_limitrb      s    G
	+--         G G G@#FFFFFFFFFGs    $ 
A 	A AA secondsc                    t          dt          |                     }|dk     r| dS |dk     r"t          |d          \  }}|r| d| dn| dS t          |d          \  }}|dz  }|r| d| dn| dS )	z6Format seconds remaining into human-readable duration.r   <   si  zm mzh h)maxrA   divmod)rc   rf   rg   secrh   	remainders         r   format_remainingrm      s    As7||A2vvwww4xx23!$1!~~s~~~~Q'''1!T??LAyRA)a<<1<<<<Q''')r'   g      N@)r   last_known_statern   Optional[Any]boolc                l    t          |           }t          |          rdS |t          |          rdS dS )u  Decide whether a 429 from Nous Portal is a real account rate limit.

    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:

      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
          exhausted — a genuine rate limit that will last until the
          bucket resets.
      (b) The upstream provider is out of capacity for a specific model
          — transient, clears in seconds, and has nothing to do with
          the caller's quota on Nous.

    Tripping the cross-session breaker on (b) blocks ALL Nous requests
    (and all models, since Nous is one provider key) for minutes even
    though the caller's account is healthy and a different model would
    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.

    We tell the two apart by looking at:

      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
         the full suite on every response including 429s.  An exhausted
         bucket (``remaining == 0`` with a reset window >= 60s) is
         proof of (a).
      2. The last-known-good rate-limit state captured by
         ``_capture_rate_limits()`` on the previous successful
         response.  If any bucket there was already near-exhausted with
         a substantial reset window, the current 429 is almost
         certainly (a) continuing from that condition.

    If neither signal fires, we treat the 429 as (b): fail the single
    request, let the retry loop or model-switch proceed, and do NOT
    write the cross-session breaker file.

    Returns True when the evidence points at (a).
    TNF)_parse_buckets_from_headers_has_exhausted_bucket_has_exhausted_bucket_in_object)r   rn   rS   s      r   is_genuine_nous_rate_limitru      sH    T (00EU## t
 #(GHX(Y(Y#t5r'   0dict[str, tuple[Optional[int], Optional[float]]]c                8   | si S d |                                  D             }t          d |D                       si S dd}dd	}i }d
D ]O} ||                    d|                     } ||                    d|                     }||||f||<   P|S )zExtract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.

    Returns empty dict when no rate-limit headers are present.
    c                >    i | ]\  }}|                                 |S r   r   r!   s      r   r%   z/_parse_buckets_from_headers.<locals>.<dictcomp>  r&   r'   c              3  @   K   | ]}|                     d           V  dS )zx-ratelimit-N)
startswith)r"   r#   s     r   	<genexpr>z._parse_buckets_from_headers.<locals>.<genexpr>  s.      ==q||N++======r'   r/   Optional[str]r	   Optional[int]c                t    | d S 	 t          t          |                     S # t          t          f$ r Y d S w xY wN)rA   r*   r+   r,   r/   s    r   
_maybe_intz/_parse_buckets_from_headers.<locals>._maybe_int  sJ    ;4	uSzz??":& 	 	 	44	s   " 77r   c                Z    | d S 	 t          |           S # t          t          f$ r Y d S w xY wr   )r*   r+   r,   r   s    r   _maybe_floatz1_parse_buckets_from_headers.<locals>._maybe_float  sD    ;4	:::& 	 	 	44	s    **)requestszrequests-1htokensz	tokens-1hzx-ratelimit-remaining-zx-ratelimit-reset-N)r/   r|   r	   r}   )r/   r|   r	   r   )r(   anyr)   )r   r-   r   r   resulttagr_   resets           r   rr   rr      s      	88888G==W===== 	       @BFA - -Jw{{+IC+I+IJJKK	W[[)Cc)C)CDDEE E$5$e,F3KMr'   buckets3Mapping[str, tuple[Optional[int], Optional[float]]]c                n    |                                  D ]\  }}||dk    r||t          k    r dS  dS )zMReturn True when any bucket has remaining == 0 AND a meaningful reset window.Nr   TF)values_MIN_RESET_FOR_BREAKER_SECONDS)r   r_   r   s      r   rs   rs     sX     $NN,,  	5	A=22244 35r'   rS   r   c                   dD ]~}t          | |d          }|t          |dd          pd}t          |dd          pd}t          |dd          }|t          |dd          pd}|dk    ri|dk    rp|t          k    r d	S d
S )a1  Check a RateLimitState-like object for an exhausted bucket.

    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
    exposed as attributes ``requests_min``, ``requests_hour``,
    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
    object missing those attributes.
    )requests_minrequests_hour
tokens_mintokens_hourNlimitr   r_   remaining_seconds_nowr:   g        TF)getattrr   )rS   attrbucketr   r_   r   s         r   rt   rt   ,  s     O  d++>++0qFK338q	  7>>=FOS99@SEA::q==22244 35r'   )r	   r
   )r   r   r	   r   )r   r   r2   r4   r3   r*   r	   r5   )r	   r   )r	   r5   )rc   r*   r	   r
   )r   r   rn   ro   r	   rp   )r   r   r	   rv   )r   r   r	   rp   )rS   r   r	   rp   )__doc__
__future__r   rG   loggingr   rD   r>   typingr   r   r   utilsr   	getLogger__name__rL   r   r   r   r1   rX   r`   rb   rm   r   ru   rr   rs   rt   r   r'   r   <module>r      s  
 
 # " " " " "   				   ) ) ) ) ) ) ) ) ) )            		8	$	$> > > >   D ,0.2#	AG AG AG AG AG AGH   0G G G G
* 
* 
* 
*  "& 
 ,0&*4 4 4 4 4 4n$ $ $ $N        r'   