
    iS                        d Z ddlmZ ddlZddlmZmZ ddlmZ erddlm	Z	 	 ddl
mZ n# e$ r ddlZY nw xY w G d	 d
          ZdS )u   Hybrid keyword/BM25 retrieval for the memory store.

Ported from KIK memory_agent.py — combines FTS5 full-text search with
Jaccard similarity reranking and trust-weighted scoring.
    )annotationsN)datetimetimezone)TYPE_CHECKING   )MemoryStore)holographicc                      e Zd ZdZ	 	 	 	 	 d0d1dZ	 	 	 d2d3dZ	 	 d4d5dZ	 	 d4d5dZ	 	 d4d6d Z	 	 	 d2d7d"Z		 	 d4d8d%Z
d3d&Zed9d)            Zed:d-            Zd;d/ZdS )<FactRetrieverz:Multi-strategy fact retrieval with trust-weighted scoring.r   皙?333333?   storer   temporal_decay_half_lifeint
fts_weightfloatjaccard_weight
hrr_weighthrr_dimc                    || _         || _        || _        |dk    rt          j        sd}d}d}|| _        || _        || _        d S )Nr   g333333?r           )r   	half_lifer   hrr
_HAS_NUMPYr   r   r   )selfr   r   r   r   r   r   s          I/home/piyush/.hermes/hermes-agent/plugins/memory/holographic/retrieval.py__init__zFactRetriever.__init__   sS     
1 >>#.>J NJ$,$    N
   querystrcategory
str | None	min_trustlimitreturn
list[dict]c                   |                      ||||dz            }|sg S |                     |          }g }|D ]z}|                     |d                   }	|                     |                    dd                    }
|	|
z  }|                     ||          }|                    dd          }| j        dk    re|                    d          rPt          j        |d                   }t          j        || j                  }t          j	        ||          d	z   d
z  }nd}| j
        |z  | j        |z  z   | j        |z  z   }||d         z  }| j        dk    r@||                     |                    d          p|                    d                    z  }||d<   |                    |           ||                    d d           |d|         }|D ]}|                    dd           |S )u  Hybrid search: FTS5 candidates → Jaccard rerank → trust weighting.

        Pipeline:
        1. FTS5 search: Get limit*3 candidates from SQLite full-text search
        2. Jaccard boost: Token overlap between query and fact content
        3. Trust weighting: final_score = relevance * trust_score
        4. Temporal decay (optional): decay = 0.5^(age_days / half_life)

        Returns list of dicts with fact data + 'score' field, sorted by score desc.
           contenttags fts_rankr   r   
hrr_vector      ?       @      ?trust_score
updated_at
created_atscorec                    | d         S Nr6    xs    r   <lambda>z&FactRetriever.search.<locals>.<lambda>k   
    !G* r   TkeyreverseN)_fts_candidates	_tokenizeget_jaccard_similarityr   r   bytes_to_phasesencode_textr   
similarityr   r   r   _temporal_decayappendsortpop)r   r!   r#   r%   r&   
candidatesquery_tokensscoredfactcontent_tokens
tag_tokens
all_tokensjaccard	fts_scorefact_vec	query_vechrr_sim	relevancer6   resultss                       r   searchzFactRetriever.search0   s   $ ))%9eaiPP
 	I ~~e,, 	  	 D!^^DO<<N(<(<==J'*4J..|ZHHGS11I ""txx'='=".tL/ABBOE4<@@	>)X>>DK 94-78/G34I
 ] 33E ~!!--dhh|.D.D.^Q]H^H^___!DMMM$ 	,,d;;;%. 	) 	)DHH\4((((r   entityc                   t           j        s|                     |||          S | j        j        }t          j        d| j                  }t          j        |                                | j                  }t          j        ||          }|rwd| }|	                    d|f          
                                }	|	rGt          j        |	d                   }
t          j        |
|          }|                     |||          S d}g }|r|dz  }|                    |           |	                    d| d	|                                          }|s|                     |||          S g }|D ]}t!          |          }t          j        |                    d
                    }t          j        ||          }t          j        d| j                  }t          j        t          j        |d         | j                  |          }t          j        ||          }|dz   dz  |d         z  |d<   |                    |           |                    d d           |d|         S )u:  Compositional entity query using HRR algebra.

        Unbinds entity from memory bank to extract associated content.
        This is NOT keyword search — it uses algebraic structure to find facts
        where the entity plays a structural role.

        Falls back to FTS5 search if numpy unavailable.
        r#   r&   __hrr_role_entity__zcat:z3SELECT vector FROM memory_banks WHERE bank_name = ?vectorWHERE hrr_vector IS NOT NULL AND category = ?
            SELECT fact_id, content, category, tags, trust_score,
                   retrieval_count, helpful_count, created_at, updated_at,
                   hrr_vector
            FROM facts
            
            r/   __hrr_role_content__r+   r0   r1   r3   r6   c                    | d         S r8   r9   r:   s    r   r<   z%FactRetriever.probe.<locals>.<lambda>   r=   r   Tr>   N)r   r   rZ   r   _connencode_atomr   lowerbindexecutefetchonerE   unbind_score_facts_by_vectorrI   fetchalldictrK   rF   rG   rJ   )r   r[   r#   r&   connrole_entity
entity_vec	probe_key	bank_namebank_rowbank_vec	extractedwhereparamsrowsrN   rowrO   rU   residualrole_contentcontent_vecsims                          r   probezFactRetriever.prober   s    ~ 	G;;v;FFFz o&;T\JJ_V\\^^T\BB
HZ55	  	)x))I||E  hjj   .x/ABBJx;;	22 3   
 / 	$((EMM(###||
    	
 	
 (** 	  	G;;v;FFF 
	  
	 C99D*488L+A+ABBHz(I66H?+A4<PPL(3?4	?DL#Q#QS_``K.;77C 3Y#-]0CCDMMM$,,d;;;fuf~r   c                   t           j        s|                     |||          S | j        j        }t          j        |                                | j                  }d}g }|r|dz  }|                    |           |	                    d| d|          
                                }|s|                     |||          S g }	|D ]}
t          |
          }t          j        |                    d                    }t          j        ||          }t          j        d| j                  }t          j        d| j                  }t          j        ||          }t          j        ||          }t!          ||          }|d	z   d
z  |d         z  |d<   |	                    |           |	                    d d           |	d|         S )uk  Discover facts that share structural connections with an entity.

        Unlike probe (which finds facts *about* an entity), related finds
        facts that are connected through shared context — e.g., other entities
        mentioned alongside this one, or content that overlaps structurally.

        Falls back to FTS5 search if numpy unavailable.
        r]   r`   ra   rb   rc   r/   r^   rd   r0   r1   r3   r6   c                    | d         S r8   r9   r:   s    r   r<   z'FactRetriever.related.<locals>.<lambda>  r=   r   Tr>   N)r   r   rZ   r   rf   rg   rh   r   rI   rj   rn   ro   rE   rK   rl   rG   maxrJ   )r   r[   r#   r&   rp   rr   rx   ry   rz   rN   r{   rO   rU   r|   rq   r}   entity_role_simcontent_role_simbest_sims                      r   relatedzFactRetriever.related   s    ~ 	G;;v;FFFz _V\\^^T\BB
 / 	$((EMM(###||
    	
 	
 (** 	  	G;;v;FFF  	  	 C99D*488L+A+ABBH z(J77H /*?NNK?+A4<PPL!nX{CCO"~hEE?,<==H%^s2T-5HHDMMM$,,d;;;fuf~r   entities	list[str]c                b   t           j        r|s-d                    |          }|                     |||          S | j        j        }t          j        d| j                  }g }|D ]X}t          j        |                                | j                  }	t          j	        |	|          }
|
                    |
           Yd}g }|r|dz  }|
                    |           |                    d| d|                                          }|s-d                    |          }|                     |||          S t          j        d| j                  }g }|D ]}t          |          }t          j        |                    d	                    }g }|D ]A}
t          j        ||
          }t          j        ||          }|
                    |           Bt%          |          }|d
z   dz  |d         z  |d<   |
                    |           |                    d d           |d|         S )u  Multi-entity compositional query — vector-space JOIN.

        Given multiple entities, algebraically intersects their structural
        connections to find facts related to ALL of them simultaneously.
        This is compositional reasoning that no embedding DB can do.

        Example: reason(["peppi", "backend"]) finds facts where peppi AND
        backend both play structural roles — without keyword matching.

        Falls back to FTS5 search if numpy unavailable.
         r]   r^   r`   ra   rb   rc   rd   r/   r0   r1   r3   r6   c                    | d         S r8   r9   r:   s    r   r<   z&FactRetriever.reason.<locals>.<lambda>O  r=   r   Tr>   N)r   r   joinrZ   r   rf   rg   r   rh   ri   rI   rj   rn   ro   rE   rK   rl   rG   minrJ   )r   r   r#   r&   r!   rp   rq   entity_residualsr[   rr   rs   rx   ry   rz   r}   rN   r{   rO   rU   entity_scoresr|   r   min_sims                          r   reasonzFactRetriever.reason  s`   " ~ 	FX 	FHHX&&E;;uxu;EEEzo&;T\JJ  	/ 	/FFFJ[99I##I.... / 	$((EMM(###||
    	
 	
 (** 	  	FHHX&&E;;uxu;EEE
 '=t|LL 	  	 C99D*488L+A+ABBHM- * *	:h	::nX|<<$$S))))-((G$s]c1D4GGDMMM$,,d;;;fuf~r   	thresholdc                   t           j        sg S | j        j        }d}g }|r|dz  }|                    |           |                    d| d|                                          }t          |          dk     rg S d}t          |          |k    rt          |d d	          }|d
|         }i }	|D ]B}
|
d         }|                    d|f                                          }d |D             |	|<   Cd |D             }g }t          t          |                    D ]}t          |dz   t          |                    D ]}||         ||         }}|	
                    |d         t                                }|	
                    |d         t                                }|r|sh||z  r%t          ||z            t          ||z            z  nd}|dk     rt          j        |d                   }t          j        |d                   }t          j        ||          }|d|dz   dz  z
  z  }||k    rd |                                D             }d |                                D             }|                    ||t          |d          t          |d          t          |d          t          ||z            d           |                    d d	           |d
|         S )u  Find potentially contradictory facts via entity overlap + content divergence.

        Two facts contradict when they share entities (same subject) but have
        low content-vector similarity (different claims). This is automated
        memory hygiene — no other memory system does this.

        Returns pairs of facts with a contradiction score.
        Falls back to empty list if numpy unavailable.
        zWHERE f.hrr_vector IS NOT NULLz AND f.category = ?z
            SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score,
                   f.created_at, f.updated_at, f.hrr_vector
            FROM facts f
            rc      i  c                "    | d         p| d         S )Nr4   r5   r9   )rs    r   r<   z*FactRetriever.contradict.<locals>.<lambda>  s    ao.P< r   Tr>   Nfact_idz
                SELECT e.name FROM entities e
                JOIN fact_entities fe ON fe.entity_id = e.entity_id
                WHERE fe.fact_id = ?
                c                B    h | ]}|d                                           S )name)rh   .0r   s     r   	<setcomp>z+FactRetriever.contradict.<locals>.<setcomp>  s&    !I!I!I!F)//"3"3!I!I!Ir   c                ,    g | ]}t          |          S r9   )ro   r   s     r   
<listcomp>z,FactRetriever.contradict.<locals>.<listcomp>  s    '''Qa'''r   r   r   r   r/   r0   r1   c                &    i | ]\  }}|d k    ||S r/   r9   r   kvs      r   
<dictcomp>z,FactRetriever.contradict.<locals>.<dictcomp>  (    QQQAqL?P?P1?P?P?Pr   c                &    i | ]\  }}|d k    ||S r   r9   r   s      r   r   z,FactRetriever.contradict.<locals>.<dictcomp>  r   r   r*   )fact_afact_bentity_overlapcontent_similaritycontradiction_scoreshared_entitiesc                    | d         S )Nr   r9   r:   s    r   r<   z*FactRetriever.contradict.<locals>.<lambda>  s    !,A*B r   )r   r   r   rf   rI   rj   rn   lensortedrangerC   setrE   rG   itemsroundrJ   )r   r#   r   r&   rp   rx   ry   rz   _MAX_CONTRADICT_FACTSfact_entitiesr{   fidentity_rowsfactscontradictionsijf1f2ents1ents2r   v1v2content_simr   f1_cleanf2_cleans                               r   
contradictzFactRetriever.contradictR  s}    ~ 	Iz 1 	$**EMM(###|| 	   
 
 (** 	 t99q==I
 !$t99,,,$$P$PZ^___D.../D .0 
	J 
	JCi.C,,
   hjj  "J!I[!I!I!IM# ('$'''s5zz"" #	 #	A1q5#e**-- " "q58B%))"Y-??%))"Y-?? E  NSUZ]!dUU]!3!3c%%-6H6H!H!Had!C'' (L)9::(L)9::!nR44 '5{S?PTW>W8W&X#&)33QQQQQHQQQQQH"))"*"**/*B*B.3K.C.C/45H!/L/L+1%%-+@+@+ +   7"H 	 B BDQQQfuf%%r   
target_vec'np.ndarray'c                   | j         j        }d}g }|r|dz  }|                    |           |                    d| d|                                          }g }|D ]v}	t          |	          }
t          j        |
                    d                    }t          j	        ||          }|dz   dz  |
d         z  |
d	<   |                    |
           w|
                    d
 d           |d|         S )z-Score facts by similarity to a target vector.r`   ra   rb   rc   r/   r0   r1   r3   r6   c                    | d         S r8   r9   r:   s    r   r<   z6FactRetriever._score_facts_by_vector.<locals>.<lambda>  r=   r   Tr>   N)r   rf   rI   rj   rn   ro   r   rE   rK   rG   rJ   )r   r   r#   r&   rp   rx   ry   rz   rN   r{   rO   rU   r   s                r   rm   z$FactRetriever._score_facts_by_vector  s#    z. 	$((EMM(###||
    	
 	
 (** 	  	  	 C99D*488L+A+ABBH.X66C 3Y#-]0CCDMMM$,,d;;;fuf~r   c                   | j         j        }g }dg}|                    |           |r*|                    d           |                    |           |                    d           |                    |           d                    |          }d| d}	|                    |           	 |                    |	|                                          }
n# t          $ r g cY S w xY w|
sg S d |
D             }|rt          |          nd}t          |d	          }g }t          |
|          D ]G\  }}t          |          }|
                    d
d           ||z  |d<   |                    |           H|S )zGet raw FTS5 candidates from the store.

        Uses the store's database connection directly for FTS5 MATCH
        with rank scoring. Normalizes FTS5 rank to [0, 1] range.
        zfacts_fts MATCH ?zf.category = ?zf.trust_score >= ?z AND z
            SELECT f.*, facts_fts.rank as fts_rank_raw
            FROM facts_fts
            JOIN facts f ON f.fact_id = facts_fts.rowid
            WHERE zA
            ORDER BY facts_fts.rank
            LIMIT ?
        c                8    g | ]}t          |d                    S )fts_rank_raw)abs)r   r{   s     r   r   z1FactRetriever._fts_candidates.<locals>.<listcomp>  s%    >>>#S^,-->>>r   r0   gư>r   Nr.   )r   rf   rI   r   rj   rn   	Exceptionr   zipro   rK   )r   r!   r#   r%   r&   rp   ry   where_clauses	where_sqlsqlrz   	raw_ranksmax_rankrY   r{   raw_rankrO   s                    r   rA   zFactRetriever._fts_candidates  s    z ,-e 	$  !1222MM(###1222i   LL//	 	   	e	<<V,,5577DD 	 	 	III	  	I ?>>>>	%.73y>>>Cx&& y11 	! 	!MC99DHH^T***'(2DNN4    s   .(C C&%C&textset[str]c                    | st                      S t                      }|                                                                 D ].}|                    d          }|r|                    |           /|S )zSimple whitespace tokenization with lowercasing.

        Strips common punctuation. No stemming/lemmatization (Phase 1).
        z.,;:!?"'()[]{}#@<>)r   rh   splitstripadd)r   tokenswordcleaneds       r   rB   zFactRetriever._tokenize   ss      	55LJJLL&&(( 	$ 	$Djj!677G $

7###r   set_ar   set_bc                p    | r|sdS t          | |z            }t          | |z            }|dk    r||z  ndS )u6   Jaccard similarity coefficient: |A ∩ B| / |A ∪ B|.r   r   )r   )r   r   intersectionunions       r   rD   z!FactRetriever._jaccard_similarity0  sR      	E 	355=))EEM""',qyy|e##c9r   timestamp_strc                   | j         r|sdS 	 t          |t                    r)t          j        |                    dd                    }n|}|j         |                    t          j                  }t          j	        t          j                  |z
  
                                dz  }|dk     rdS t          j        d|| j         z            S # t          t          f$ r Y dS w xY w)	zExponential decay: 0.5^(age_days / half_life_days).

        Returns 1.0 if decay is disabled or timestamp is missing.
        r0   Zz+00:00N)tzinfoiQ r   r2   )r   
isinstancer"   r   fromisoformatreplacer   r   utcnowtotal_secondsmathpow
ValueError	TypeError)r   r   tsage_dayss       r   rH   zFactRetriever._temporal_decay9  s    
 ~ 	] 	3	--- #+M,A,A#x,P,PQQ"y ZZx|Z44 X\22R7FFHH5PH!||s8CDN!:;;;I& 	 	 	33	s   B#C 2C C$#C$)r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )Nr   r    )
r!   r"   r#   r$   r%   r   r&   r   r'   r(   )Nr    )r[   r"   r#   r$   r&   r   r'   r(   )r   r   r#   r$   r&   r   r'   r(   )r#   r$   r   r   r&   r   r'   r(   )r   r   r#   r$   r&   r   r'   r(   )r   r"   r'   r   )r   r   r   r   r'   r   )r   r$   r'   r   )__name__
__module____qualname____doc__r   rZ   r   r   r   r   rm   rA   staticmethodrB   rD   rH   r9   r   r   r   r      s       DD
 )* #% % % % %4  $@ @ @ @ @J  $	L L L L Lb  $	B B B B BN  $	L L L L L`  $	h& h& h& h& h&Z  $	# # # # #J= = = =~    \ : : : \:     r   r   )r   
__future__r   r   r   r   typingr   r   r   r-   r	   r   ImportErrorr   r9   r   r   <module>r      s     # " " " " "  ' ' ' ' ' ' ' '             #""""""$$$$$$$   { { { { { { { { { {s   + 	77