add mini-context-graph skill (#1580)

* add mini-context-graph skill * remove pycache files * filename case update to SKILL.md * update readme
2026-05-05 14:42:12 +00:00 · 2026-05-05 09:34:37 +05:30
parent 1f96bce626
commit 746ba555b6
16 changed files with 2343 additions and 0 deletions
@@ -0,0 +1,90 @@
+"""
+index_store.py — Maintains entity and keyword indexes for fast lookup.
+
+Handles:
+- Entity index: name → [node_ids]
+- Keyword index: token → [node_ids]
+- Persist to index.json
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+_DATA_DIR = Path(os.environ.get("MINI_CONTEXT_GRAPH_DATA_DIR", str(config.DATA_DIR)))
+_INDEX_FILE = _DATA_DIR / "index.json"
+
+_STOPWORDS = frozenset(
+    [
+        "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "shall", "can", "to", "of", "in", "on",
+        "at", "by", "for", "with", "from", "and", "or", "but", "not", "it",
+        "its", "this", "that", "these", "those", "i", "you", "he", "she",
+        "we", "they", "what", "which", "who", "how", "why", "when", "where",
+    ]
+)
+
+
+def _load() -> dict:
+    if _INDEX_FILE.exists():
+        with open(_INDEX_FILE, "r") as f:
+            return json.load(f)
+    return {"entity_index": {}, "keyword_index": {}}
+
+
+def _save(index: dict) -> None:
+    _DATA_DIR.mkdir(parents=True, exist_ok=True)
+    with open(_INDEX_FILE, "w") as f:
+        json.dump(index, f, indent=2)
+
+
+def _tokenize(text: str) -> list[str]:
+    """Split text into lowercase tokens, removing stopwords and short tokens."""
+    tokens = re.findall(r"[a-z0-9]+", text.lower())
+    return [t for t in tokens if t not in _STOPWORDS and len(t) > 1]
+
+
+def add_entity(name: str, node_id: str) -> None:
+    """Register an entity name → node_id in both entity and keyword indexes."""
+    index = _load()
+    name_lower = name.strip().lower()
+
+    # Entity index
+    if name_lower not in index["entity_index"]:
+        index["entity_index"][name_lower] = []
+    if node_id not in index["entity_index"][name_lower]:
+        index["entity_index"][name_lower].append(node_id)
+
+    # Keyword index
+    for token in _tokenize(name_lower):
+        if token not in index["keyword_index"]:
+            index["keyword_index"][token] = []
+        if node_id not in index["keyword_index"][token]:
+            index["keyword_index"][token].append(node_id)
+
+    _save(index)
+
+
+def search(query: str) -> list[str]:
+    """Search for node_ids matching the query via entity name or keywords."""
+    index = _load()
+    query_lower = query.strip().lower()
+    matched_ids: set[str] = set()
+
+    # Exact entity name match
+    if query_lower in index["entity_index"]:
+        matched_ids.update(index["entity_index"][query_lower])
+
+    # Keyword match
+    for token in _tokenize(query_lower):
+        if token in index["keyword_index"]:
+            matched_ids.update(index["keyword_index"][token])
+
+    return list(matched_ids)