
    i[                    h   U d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZmZmZmZ  ej        e          ZdZdZdZd	Zd
ZdZdZe G d d                      Zd/dZi Zded<   d0dZd1dZd2dZd3dZ d4d Z! ej"        d!ej#                  Z$d5d$Z%ed%d6d+Z& G d, d-          Z'g d.Z(dS )7un  Persistent session goals — the Ralph loop for Hermes.

A goal is a free-form user objective that stays active across turns. After
each turn completes, a small judge call asks an auxiliary model "is this
goal satisfied by the assistant's last response?". If not, Hermes feeds a
continuation prompt back into the same session and keeps working until the
goal is done, turn budget is exhausted, the user pauses/clears it, or the
user sends a new message (which takes priority and pauses the goal loop).

State is persisted in SessionDB's ``state_meta`` table keyed by
``goal:<session_id>`` so ``/resume`` picks it up.

Design notes / invariants:

- The continuation prompt is just a normal user message appended to the
  session via ``run_conversation``. No system-prompt mutation, no toolset
  swap — prompt caching stays intact.
- Judge failures are fail-OPEN: ``continue``. A broken judge must not wedge
  progress; the turn budget is the backstop.
- When a real user message arrives mid-loop it preempts the continuation
  prompt and also pauses the goal loop for that turn (we still re-judge
  after, so if the user's message happens to complete the goal the judge
  will say ``done``).
- This module has zero hard dependency on ``cli.HermesCLI`` or the gateway
  runner — both wire the same ``GoalManager`` in.

Nothing in this module touches the agent's system prompt or toolset.
    )annotationsN)	dataclassasdict)AnyDictOptionalTuple   g      >@i     a  [Continuing toward your standing goal]
Goal: {goal}

Continue working toward this goal. Take the next concrete step. If you believe the goal is complete, state so explicitly and stop. If you are blocked and need input from the user, say so clearly and stop.u  You are a strict judge evaluating whether an autonomous agent has achieved a user's stated goal. You receive the goal text and the agent's most recent response. Your only job is to decide whether the goal is fully satisfied based on that response.

A goal is DONE only when:
- The response explicitly confirms the goal was completed, OR
- The response clearly shows the final deliverable was produced, OR
- The response explains the goal is unachievable / blocked / needs user input (treat this as DONE with reason describing the block).

Otherwise the goal is NOT done — CONTINUE.

Reply ONLY with a single JSON object on one line:
{"done": <true|false>, "reason": "<one-sentence rationale>"}zNGoal:
{goal}

Agent's most recent response:
{response}

Is the goal satisfied?c                      e Zd ZU dZded<   dZded<   dZded<   eZded	<   d
Z	ded<   d
Z
ded<   dZded<   dZded<   dZded<   dZded<   ddZedd            ZdS )	GoalStatez+Serializable goal state stored per session.strgoalactivestatusr   int
turns_used	max_turns        float
created_atlast_turn_atNOptional[str]last_verdictlast_reasonpaused_reasonconsecutive_parse_failuresreturnc                H    t          j        t          |           d          S )NF)ensure_ascii)jsondumpsr   selfs    5/home/piyush/.hermes/hermes-agent/hermes_cli/goals.pyto_jsonzGoalState.to_jsonp   s    z&,,U;;;;    raw'GoalState'c                   t          j        |          } | |                    dd          |                    dd          t          |                    dd          pd          t          |                    dt                    pt                    t          |                    dd	          pd	          t          |                    d
d	          pd	          |                    d          |                    d          |                    d          t          |                    dd          pd          
  
        S )Nr    r   r   r   r   r   r   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   )r!   loadsgetr   DEFAULT_MAX_TURNSr   )clsr(   datas      r%   	from_jsonzGoalState.from_jsons   s   z#s&"%%88Hh//488L!449::$((;0ABBWFWXXTXXlC88?C@@txx<<CDD.11//((?33'*4884PRS+T+T+YXY'Z'Z
 
 
 	
r'   r   r   )r(   r   r   r)   )__name__
__module____qualname____doc____annotations__r   r   r.   r   r   r   r   r   r   r   r&   classmethodr1    r'   r%   r   r   a   s         55IIIFJ&I&&&&JL"&L&&&&!%K%%%%#'M''''&'''''< < < < 
 
 
 [
 
 
r'   r   
session_idr   r   c                    d|  S )Nzgoal:r9   )r:   s    r%   	_meta_keyr<      s    :r'   Dict[str, Any]	_DB_CACHEOptional[Any]c                    	 ddl m}  ddlm} t	           |                       }n3# t
          $ r&}t                              d|           Y d}~dS d}~ww xY wt          	                    |          }||S 	  |            }n3# t
          $ r&}t                              d|           Y d}~dS d}~ww xY w|t          |<   |S )a  Return a SessionDB instance for the current HERMES_HOME.

    SessionDB has no built-in singleton, but opening a new connection per
    /goal call would thrash the file. We cache one instance per
    ``hermes_home`` path so profile switches still pick up the right DB.
    Defensive against import/instantiation failures so tests and
    non-standard launchers can still use the GoalManager.
    r   )get_hermes_home)	SessionDBz,GoalManager: SessionDB bootstrap failed (%s)Nz$GoalManager: SessionDB() raised (%s))
hermes_constantsrA   hermes_staterB   r   	Exceptionloggerdebugr>   r-   )rA   rB   homeexccacheddbs         r%   _get_session_dbrL      s   444444******??$$%%   CSIIIttttt ]]4  FY[[   ;SAAAttttt IdOIs,   #& 
AAA8
B 
B3B..B3Optional[GoalState]c                   | sdS t                      }|dS 	 |                    t          |                     }n3# t          $ r&}t                              d|           Y d}~dS d}~ww xY w|sdS 	 t                              |          S # t          $ r'}t                              d| |           Y d}~dS d}~ww xY w)z4Load the goal for a session, or None if none exists.Nz GoalManager: get_meta failed: %sz3GoalManager: could not parse stored goal for %s: %s)	rL   get_metar<   rE   rF   rG   r   r1   warning)r:   rK   r(   rI   s       r%   	load_goalrQ      s     t			B	ztkk)J//00   7===ttttt  t""3'''   LjZ]^^^ttttts-   "; 
A+A&&A+3B 
B>B99B>stateNonec                   | sdS t                      }|dS 	 |                    t          |           |                                           dS # t          $ r&}t
                              d|           Y d}~dS d}~ww xY w)z5Persist a goal to SessionDB. No-op if DB unavailable.Nz GoalManager: set_meta failed: %s)rL   set_metar<   r&   rE   rF   rG   )r:   rR   rK   rI   s       r%   	save_goalrV      s     			B	z>
Ij))5==??;;;;; > > >7=========>s   5A 
A?A::A?c                Z    t          |           }|dS d|_        t          | |           dS )zDMark a goal cleared in the DB (preserved for audit, status=cleared).Ncleared)rQ   r   rV   )r:   rR   s     r%   
clear_goalrY      s6    j!!E}ELj%     r'   textlimitr   c                N    | sdS t          |           |k    r| S | d |         dz   S )Nr+   u   … [truncated])len)rZ   r[   s     r%   	_truncater^      s9     r
4yyE<+++r'   z\{.*?\}r(   Tuple[bool, str, bool]c                l   | sdS |                                  }|                    d          r=|                     d          }|                    d          }|dk    r||dz   d         }d}	 t          j        |          }ng# t
          $ rZ t                              |          }|r;	 t          j        |                    d                    }n# t
          $ r d}Y nw xY wY nw xY wt          |t                    sd	d
t          | d          dfS |                    d          }t          |t                    r)|                                                                 dv }nt          |          }t          |                    d          pd                                           }|sd}||d	fS )a  Parse the judge's reply. Fail-open to ``(False, "<reason>", parse_failed)``.

    Returns ``(done, reason, parse_failed)``. ``parse_failed`` is True when the
    judge returned output that couldn't be interpreted as the expected JSON
    verdict (empty body, prose, malformed JSON). Callers use that flag to
    auto-pause after N consecutive parse failures so a weak judge model
    doesn't silently burn the turn budget.
    )Fzjudge returned empty responseTz````
   Nr   Fzjudge reply was not JSON:    Tdone)trueyes1rf   reasonr+   zno reason provided)strip
startswithfindr!   r,   rE   _JSON_OBJECT_REsearchgroup
isinstancedictr^   r-   r   lowerbool)r(   rZ   nlr0   matchdone_valrf   rj   s           r%   _parse_judge_responserx      s     <;;99;;D u !zz#YYt__88Q=D &*D	z$   &&t,, 	z%++a..11    dD!! QJ9S#3F3FJJDPPxxH(C   ~~%%''+GGH~~(##)r**0022F &%s6   .B &C'*'CC'C!C' C!!C'&C')timeoutr   last_responsery   r   Tuple[str, str, bool]c                  |                                  sdS |                                 sdS 	 ddlm} n3# t          $ r&}t                              d|           Y d}~dS d}~ww xY w	  |d          \  }}n3# t          $ r&}t                              d	|           Y d}~dS d}~ww xY w||sd
S t                              t          | d          t          |t                              }	 |j
        j                            |dt          dd|dgdd|          }nL# t          $ r?}t                              d|           ddt          |          j         dfcY d}~S d}~ww xY w	 |j        d         j        j        pd}	n# t          $ r d}	Y nw xY wt)          |	          \  }
}}|
rdnd}t                              d|t          |d                     |||fS )u  Ask the auxiliary model whether the goal is satisfied.

    Returns ``(verdict, reason, parse_failed)`` where verdict is ``"done"``,
    ``"continue"``, or ``"skipped"`` (when the judge couldn't be reached).

    ``parse_failed`` is True only when the judge call succeeded but its output
    was unusable (empty or non-JSON). API/transport errors return False — they
    are transient and should fail-open silently. Callers use this flag to
    auto-pause after N consecutive parse failures (see
    ``DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES``).

    This is deliberately fail-open: any error returns ``("continue", "...", False)``
    so a broken judge doesn't wedge progress — the turn budget and the
    consecutive-parse-failures auto-pause are the backstops.
    )skippedz
empty goalF)continuez$empty response (nothing to evaluate)Fr   )get_text_auxiliary_clientz.goal judge: auxiliary client import failed: %sN)r~   zauxiliary client unavailableF
goal_judgez0goal judge: get_text_auxiliary_client failed: %s)r~   zno auxiliary client configuredFi  )r   responsesystem)rolecontentuserre   )modelmessagestemperature
max_tokensry   u@   goal judge: API call failed (%s) — falling through to continuer~   zjudge error: Fr+   rf   z goal judge: verdict=%s reason=%sx   )rk   agent.auxiliary_clientr   rE   rF   rG   JUDGE_USER_PROMPT_TEMPLATEformatr^   _JUDGE_RESPONSE_SNIPPET_CHARSchatcompletionscreateJUDGE_SYSTEM_PROMPTinfotyper3   choicesmessager   rx   )r   rz   ry   r   rI   clientr   promptrespr(   rf   rj   parse_failedverdicts                 r%   
judge_goalr     s   * ::<< .--   IHHADDDDDDD A A AEsKKK@@@@@@AA11,?? A A AGMMM@@@@@@A ~U~BB'..tT""=*GHH /  F
G{&--!.ABBF33  . 	
 	
  G G GVX[\\\?499+=??FFFFFFFGl1o%-3    "7s!;!;D&,,ff*G
KK2GYvs=S=STTTFL((s\   5 
A%A  A%)A8 8
B(B##B(/1D! !
E*+4E%E*%E*.F FFc                      e Zd ZdZedd'dZed(d
            Zd)dZd)dZ	d*dZ
ddd+dZd,d-dZddd.dZd/dZd0dZdd d1d$Zd2d&ZdS )3GoalManageruf  Per-session goal state + continuation decisions.

    The CLI and gateway each hold one ``GoalManager`` per live session.

    Methods:

    - ``set(goal)`` — start a new standing goal.
    - ``clear()`` — remove the active goal.
    - ``pause()`` / ``resume()`` — explicit user controls.
    - ``status()`` — printable one-liner.
    - ``evaluate_after_turn(last_response)`` — call the judge, update state,
      and return a decision dict the caller uses to drive the next turn.
    - ``next_continuation_prompt()`` — the canonical user-role message to
      feed back into ``run_conversation``.
    )default_max_turnsr:   r   r   r   c               r    || _         t          |pt                    | _        t	          |          | _        d S N)r:   r   r.   r   rQ   _state)r$   r:   r   s      r%   __init__zGoalManager.__init__y  s3    $!$%6%K:K!L!L+4Z+@+@r'   r   rM   c                    | j         S r   )r   r#   s    r%   rR   zGoalManager.state  s
    {r'   rt   c                4    | j         d uo| j         j        dk    S )Nr   r   r   r#   s    r%   	is_activezGoalManager.is_active  s    {$&I4;+=+IIr'   c                0    | j         d uo| j         j        dv S )N)r   pausedr   r#   s    r%   has_goalzGoalManager.has_goal  s    {$&U4;+=AU+UUr'   c                2   | j         }|	|j        dv rdS |j         d|j         d}|j        dk    rd| d|j         S |j        dk    r"|j        r
d	|j         nd
}d| | d|j         S |j        dk    rd| d|j         S d|j         d| d|j         S )N)rX   z*No active goal. Set one with /goal <text>./z turnsr   u   ⊙ Goal (active, ): r   u    — r+   u   ⏸ Goal (paused, rf   u   ✓ Goal done (zGoal (z, )r   r   r   r   r   r   )r$   sturnsextras       r%   status_linezGoalManager.status_line  s    K9L00??<55!+5558x:::!&:::8x12H-AO---bEAAuAAAAA8v7U77qv777666E66af666r'   N)r   r   r   Optional[int]r   c                  |pd                                 }|st          d          t          |dd|rt          |          n| j        t          j                    d          }|| _        t          | j        |           |S )Nr+   zgoal text is emptyr   r   r   )r   r   r   r   r   r   )	rk   
ValueErrorr   r   r   timer   rV   r:   )r$   r   r   rR   s       r%   setzGoalManager.set  s    
!!## 	31222(1Mc)nnnt7My{{
 
 
 $/5)))r'   user-pausedrj   c                    | j         sd S d| j         _        || j         _        t          | j        | j                    | j         S )Nr   )r   r   r   rV   r:   r$   rj   s     r%   pausezGoalManager.pause  sA    { 	4%$*!$/4;///{r'   T)reset_budgetr   c                   | j         sd S d| j         _        d | j         _        |rd| j         _        t	          | j        | j                    | j         S )Nr   r   )r   r   r   r   rV   r:   )r$   r   s     r%   resumezGoalManager.resume  sS    { 	4%$(! 	'%&DK"$/4;///{r'   rS   c                r    | j         d S d| j         _        t          | j        | j                    d | _         d S )NrX   )r   r   rV   r:   r#   s    r%   clearzGoalManager.clear  s8    ;F&$/4;///r'   c                    | j         sd S d| j         _        d| j         _        || j         _        t	          | j        | j                    d S )Nrf   )r   r   r   r   rV   r:   r   s     r%   	mark_donezGoalManager.mark_done  sI    { 	F##) "($/4;/////r'   )user_initiatedrz   r   r=   c               Z   | j         }||j        dk    r|r|j        ndddddddS |xj        dz  c_        t          j                    |_        t          |j        |          \  }}}||_        ||_        |r|xj	        dz  c_	        nd	|_	        |d
k    r(d
|_        t          | j        |           d
ddd
|d| dS |j	        t          k    r>d|_        d|j	         d|_        t          | j        |           dddd|d|j	         ddS |j        |j        k    rNd|_        d|j         d|j         d|_        t          | j        |           dddd|d|j         d|j         ddS t          | j        |           dd|                                 d|d|j         d|j         d| dS )uv  Run the judge and update state. Return a decision dict.

        ``user_initiated`` distinguishes a real user prompt (True) from a
        continuation prompt we fed ourselves (False). Both increment
        ``turns_used`` because both consume model budget.

        Decision keys:
          - ``status``: current goal status after update
          - ``should_continue``: bool — caller should fire another turn
          - ``continuation_prompt``: str or None
          - ``verdict``: "done" | "continue" | "skipped" | "inactive"
          - ``reason``: str
          - ``message``: user-visible one-liner to print/send
        Nr   Finactivezno active goalr+   )r   should_continuecontinuation_promptr   rj   r   rd   r   rf   u   ✓ Goal achieved: r   z(judge model returned unparseable output z turns in a rowr~   u%   ⏸ Goal paused — the judge model (z turns) isn't returning the required JSON verdict. Route the judge to a stricter model in ~/.hermes/config.yaml:
  auxiliary:
    goal_judge:
      provider: openrouter
      model: google/gemini-3-flash-preview
Then /goal resume to continue.zturn budget exhausted (r   )u   ⏸ Goal paused — zD turns used. Use /goal resume to keep going, or /goal clear to stop.Tu   ↻ Continuing toward goal (r   )r   r   r   r   r   r   r   r   r   r   rV   r:   &DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURESr   r   next_continuation_prompt)r$   rz   r   rR   r   rj   r   s          r%   evaluate_after_turnzGoalManager.evaluate_after_turn  s{   ( =ELH44*/9%,,T#('+%*   	A!Y[[(25:}(M(M%$"
  	1,,1,,,/0E,f!ELdou--- #('+! 999   +/UUU#ELl5;[lll  dou---"#('+% 5E<\ 5 5 5  $ u..#EL"aE<L"a"au"a"a"aEdou---"#('+% N5+; N Neo N N N
 
 
 	$/5)))##'#@#@#B#B!^u/?^^%/^^V\^^	
 	
 		
r'   r   c                ~    | j         r| j         j        dk    rd S t                              | j         j                  S )Nr   )r   )r   r   CONTINUATION_PROMPT_TEMPLATEr   r   r#   s    r%   r   z$GoalManager.next_continuation_promptB  s<    { 	dk0H<<4+228H2IIIr'   )r:   r   r   r   )r   rM   )r   rt   r2   )r   r   r   r   r   r   )r   )rj   r   r   rM   )r   rt   r   rM   )r   rS   )rj   r   r   rS   )rz   r   r   rt   r   r=   )r   r   )r3   r4   r5   r6   r.   r   propertyrR   r   r   r   r   r   r   r   r   r   r   r9   r'   r%   r   r   h  sy          EV A A A A A A    XJ J J JV V V V7 7 7 7  <@            .2         0 0 0 0  $	s
 s
 s
 s
 s
 s
jJ J J J J Jr'   r   )r   r   r   r.   rQ   rV   rY   r   )r:   r   r   r   )r   r?   )r:   r   r   rM   )r:   r   rR   r   r   rS   )r:   r   r   rS   )rZ   r   r[   r   r   r   )r(   r   r   r_   )r   r   rz   r   ry   r   r   r{   ))r6   
__future__r   r!   loggingrer   dataclassesr   r   typingr   r   r   r	   	getLoggerr3   rF   r.   DEFAULT_JUDGE_TIMEOUTr   r   r   r   r   r   r<   r>   r7   rL   rQ   rV   rY   r^   compileDOTALLrn   rx   r   r   __all__r9   r'   r%   <module>r      s:    : # " " " " "   				  ) ) ) ) ) ) ) ) - - - - - - - - - - - -		8	$	$    $  *+ &P I "  
 
 
 
 
 
 
 
N        	       <   *
> 
> 
> 
>! ! ! !, , , , "*Z33. . . .j +	F) F) F) F) F) F)\]J ]J ]J ]J ]J ]J ]J ]J@	 	 	r'   