cortexlinux · pavanimanchala53 · Nov 13, 2025 · Nov 14, 2025 · Nov 13, 2025 · Nov 13, 2025
@@ -42,7 +42,6 @@ MANIFEST
 # ==============================
 # PyInstaller
 # ==============================
-# Usually contains temporary files from pyinstaller builds
 *.manifest
 *.spec
 

@@ -0,0 +1,222 @@
+import difflib
+import re
+from difflib import SequenceMatcher
-import difflib
-import re
-from difflib import SequenceMatcher
+import difflib
+import re
-import difflib
-import re
-from difflib import SequenceMatcher
+import difflib
+import re
+from typing import Dict, Any, List, Tuple
+
+# Vocabulary for typo correction
+VOCAB = {
+    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
+    "docker", "kubernetes", "k8s", "kubectl",
+    "nginx", "apache", "httpd", "web", "server",
+    "flask", "django", "tensorflow", "pytorch", "torch",
+    "install", "setup", "development", "env", "environment",
+}
-VOCAB = {
-    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
-    "docker", "kubernetes", "k8s", "kubectl",
-    "nginx", "apache", "httpd", "web", "server",
-    "flask", "django", "tensorflow", "pytorch", "torch",
-    "install", "setup", "development", "env", "environment",
-}
+VOCAB: set[str] = {
+    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
+    "docker", "kubernetes", "k8s", "kubectl",
+    "nginx", "apache", "httpd", "web", "server",
+    "flask", "django", "tensorflow", "pytorch", "torch",
+    "install", "setup", "development", "env", "environment",
+}
-VOCAB = {
-    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
-    "docker", "kubernetes", "k8s", "kubectl",
-    "nginx", "apache", "httpd", "web", "server",
-    "flask", "django", "tensorflow", "pytorch", "torch",
-    "install", "setup", "development", "env", "environment",
-}
+VOCAB: set[str] = {
+    "python", "pip", "venv", "virtualenv", "conda", "anaconda",
+    "docker", "kubernetes", "k8s", "kubectl",
+    "nginx", "apache", "httpd", "web", "server",
+    "flask", "django", "tensorflow", "pytorch", "torch",
+    "install", "setup", "development", "env", "environment",
+}
+
+# Canonical examples for lightweight semantic matching
+INTENT_EXAMPLES = {
+    "install_ml": [
+        "install something for machine learning",
+        "install pytorch",
+        "install tensorflow",
+        "i want to run pytorch",
+    ],
+    "install_web_server": [
+        "i need a web server",
+        "install nginx",
+        "install apache",
+        "set up a web server",
+    ],
+    "setup_python_env": [
+        "set up python development environment",
+        "install python 3.10",
+        "create python venv",
+        "setup dev env",
+    ],
+    "install_docker": [
+        "install docker",
+        "add docker",
+        "deploy containers - docker",
+    ],
+    "install_docker_k8s": [
+        "install docker and kubernetes",
+        "docker and k8s",
+        "k8s and docker on my mac",
+    ],
+}
+
+
+def normalize(text: str) -> str:
+    text = text.lower()
+    text = text.replace("-", " ")
+    text = re.sub(r"[^a-z0-9.\s]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
-def normalize(text: str) -> str:
-    text = text.lower()
-    text = text.replace("-", " ")
-    text = re.sub(r"[^a-z0-9.\s]", " ", text)
-    text = re.sub(r"\s+", " ", text).strip()
-    return text
+def normalize(text: str) -> str:
+    """Normalize text for parsing: lowercase, remove special chars, collapse whitespace.
+    
+    Preserves dots and digits for version numbers (e.g., 'Python 3.10').
+    """
+    text = text.lower()
+    text = text.replace("-", " ")
+    text = re.sub(r"[^a-z0-9.\s]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
-def normalize(text: str) -> str:
-    text = text.lower()
-    text = text.replace("-", " ")
-    text = re.sub(r"[^a-z0-9.\s]", " ", text)
-    text = re.sub(r"\s+", " ", text).strip()
-    return text
+def normalize(text: str) -> str:
+    """Normalize text for parsing: lowercase, remove special chars, collapse whitespace.
+    
+    Preserves dots and digits for version numbers (e.g., 'Python 3.10').
+    """
+    text = text.lower()
+    text = text.replace("-", " ")
+    text = re.sub(r"[^a-z0-9.\s]", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def tokenize(text: str) -> List[str]:
+    return text.split()
+
+
+def spell_correct_token(token: str) -> Tuple[str, bool]:
+    """Return corrected_token, was_corrected"""
+    if token in VOCAB:
+        return token, False
+    close = difflib.get_close_matches(token, VOCAB, n=1, cutoff=0.75)
+    if close:
+        return close[0], True
+    return token, False
+
+
+def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
+    corrections = []
+    new_tokens = []
+    for t in tokens:
+        new, fixed = spell_correct_token(t)
+        if fixed:
+            corrections.append((t, new))
+        new_tokens.append(new)
+    return new_tokens, corrections
-def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
-    corrections = []
-    new_tokens = []
-    for t in tokens:
-        new, fixed = spell_correct_token(t)
-        if fixed:
-            corrections.append((t, new))
-        new_tokens.append(new)
-    return new_tokens, corrections
+def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
+    """Apply spell correction to all tokens.
+    
+    Returns:
+        Tuple of (corrected_tokens, corrections) where corrections is a list of (original, corrected) pairs.
+    """
+    corrections = []
+    new_tokens = []
+    for t in tokens:
+        new, fixed = spell_correct_token(t)
+        if fixed:
+            corrections.append((t, new))
+        new_tokens.append(new)
+    return new_tokens, corrections
-def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
-    corrections = []
-    new_tokens = []
-    for t in tokens:
-        new, fixed = spell_correct_token(t)
-        if fixed:
-            corrections.append((t, new))
-        new_tokens.append(new)
-    return new_tokens, corrections
+def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]:
+    """Apply spell correction to all tokens.
+    
+    Returns:
+        Tuple of (corrected_tokens, corrections) where corrections is a list of (original, corrected) pairs.
+    """
+    corrections = []
+    new_tokens = []
+    for t in tokens:
+        new, fixed = spell_correct_token(t)
+        if fixed:
+            corrections.append((t, new))
+        new_tokens.append(new)
+    return new_tokens, corrections
+
+
+def fuzzy_phrase_score(a: str, b: str) -> float:
+    return SequenceMatcher(None, a, b).ratio()
+
+
+def semantic_intent_score(text: str) -> Tuple[str, float]:
+    """Compare text with intent examples."""
+    best_intent = "unknown"
+    best_score = 0.0
+
+    for intent, examples in INTENT_EXAMPLES.items():
+        for ex in examples:
+            score = fuzzy_phrase_score(text, ex)
+            if score > best_score:
+                best_score = score
+                best_intent = intent
+
+    return best_intent, best_score
+
+
+def rule_intent(text: str) -> Tuple[str, float]:
+    """Simple keyword/rule-based detection."""
+    t = text
+
+    if "docker" in t:
+        if "kubernetes" in t or "k8s" in t or "kubectl" in t:
+            return "install_docker_k8s", 0.95
+        return "install_docker", 0.9
+
+    if "kubernetes" in t or "k8s" in t or "kubectl" in t:
+        return "install_docker_k8s", 0.9
+
+    if "nginx" in t or "apache" in t or "httpd" in t or "web server" in t:
+        return "install_web_server", 0.9
+
+    if "python" in t or "venv" in t or "conda" in t or "anaconda" in t:
+        return "setup_python_env", 0.9
+
+    if any(word in t for word in ("tensorflow", "pytorch", "torch", "machine learning", "ml")):
+        return "install_ml", 0.9
+
+    return "unknown", 0.0
+
+
+VERSION_RE = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
+PLATFORM_RE = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
+PACKAGE_RE = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
-VERSION_RE = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
-PLATFORM_RE = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
-PACKAGE_RE = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
+VERSION_RE: re.Pattern[str] = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
+PLATFORM_RE: re.Pattern[str] = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
+PACKAGE_RE: re.Pattern[str] = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
-VERSION_RE = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
-PLATFORM_RE = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
-PACKAGE_RE = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
+VERSION_RE: re.Pattern[str] = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)")
+PLATFORM_RE: re.Pattern[str] = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b")
+PACKAGE_RE: re.Pattern[str] = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b")
+
+
+def extract_slots(text: str) -> Dict[str, Any]:
+    slots = {}
+
+    v = VERSION_RE.search(text)
+    if v:
+        slots["python_version"] = v.group(1)
+
+    p = PLATFORM_RE.search(text)
+    if p:
+        slots["platform"] = p.group(1)
+
+    pkgs = PACKAGE_RE.findall(text)
+    if pkgs:
+        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
+
+    return slots
-def extract_slots(text: str) -> Dict[str, Any]:
-    slots = {}
-
-    v = VERSION_RE.search(text)
-    if v:
-        slots["python_version"] = v.group(1)
-
-    p = PLATFORM_RE.search(text)
-    if p:
-        slots["platform"] = p.group(1)
-
-    pkgs = PACKAGE_RE.findall(text)
-    if pkgs:
-        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
-
-    return slots
+def extract_slots(text: str) -> Dict[str, Any]:
+    """Extract structured slots from text: python_version, platform, packages.
+    
+    Returns:
+        Dictionary with extracted slots (only present if found in text).
+    """
+    slots = {}
+
+    v = VERSION_RE.search(text)
+    if v:
+        slots["python_version"] = v.group(1)
+
+    p = PLATFORM_RE.search(text)
+    if p:
+        slots["platform"] = p.group(1)
+
+    pkgs = PACKAGE_RE.findall(text)
+    if pkgs:
+        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
+
+    return slots
-def extract_slots(text: str) -> Dict[str, Any]:
-    slots = {}
-
-    v = VERSION_RE.search(text)
-    if v:
-        slots["python_version"] = v.group(1)
-
-    p = PLATFORM_RE.search(text)
-    if p:
-        slots["platform"] = p.group(1)
-
-    pkgs = PACKAGE_RE.findall(text)
-    if pkgs:
-        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
-
-    return slots
+def extract_slots(text: str) -> Dict[str, Any]:
+    """Extract structured slots from text: python_version, platform, packages.
+    
+    Returns:
+        Dictionary with extracted slots (only present if found in text).
+    """
+    slots = {}
+
+    v = VERSION_RE.search(text)
+    if v:
+        slots["python_version"] = v.group(1)
+
+    p = PLATFORM_RE.search(text)
+    if p:
+        slots["platform"] = p.group(1)
+
+    pkgs = PACKAGE_RE.findall(text)
+    if pkgs:
+        slots["packages"] = list(dict.fromkeys(pkgs))  # unique preserve order
+
+    return slots
+
+
+def aggregate_confidence(c_rule, c_sem, num_corrections, c_classifier=0.0):
+    penalty = 1 - (num_corrections * 0.1)
+    penalty = max(0.0, penalty)
+
+    final = (
+        0.4 * c_rule +
+        0.4 * c_sem +
+        0.2 * c_classifier
+    ) * penalty
+
+    return round(max(0.0, min(1.0, final)), 2)
+
+
+def decide_clarifications(intent, confidence):
+    if intent == "unknown" or confidence < 0.6:
+        return [
+            "Install Docker and Kubernetes",
+            "Set up Python development environment",
+            "Install a web server (nginx/apache)",
+            "Install ML libraries (tensorflow/pytorch)",
+        ]
+    if intent == "setup_python_env" and confidence < 0.75:
+        return ["Use venv", "Use conda", "Install a specific Python version"]
+    return []
+
+
+def parse_request(text: str) -> Dict[str, Any]:
+    """Main function used by tests and demo."""
+    norm = normalize(text)
+    tokens = tokenize(norm)
+
+    tokens_corr, corrections = apply_spell_correction(tokens)
+    corrected_text = " ".join(tokens_corr)
+
+    rule_int, c_rule = rule_intent(corrected_text)
+    sem_int, c_sem = semantic_intent_score(corrected_text)
+
+    if rule_int != "unknown" and rule_int == sem_int:
+        chosen_intent = rule_int
+        c_classifier = 0.95
+    elif rule_int != "unknown":
+        chosen_intent = rule_int
+        c_classifier = 0.0
+    elif c_sem > 0.6:
+        chosen_intent = sem_int
+        c_classifier = 0.0
+    else:
+        chosen_intent = "unknown"
+        c_classifier = 0.0
+
+    slots = extract_slots(corrected_text)
+
+    confidence = aggregate_confidence(
+        c_rule, c_sem, len(corrections), c_classifier
+    )
+
+    clarifications = decide_clarifications(chosen_intent, confidence)
+
+    explanation = []
+    if corrections:
+        explanation.append(
+            "corrected: " + ", ".join(f"{a}->{b}" for a, b in corrections)
+        )
+    explanation.append(f"rule_intent={rule_int} ({c_rule:.2f})")
+    explanation.append(f"semantic_match={sem_int} ({c_sem:.2f})")
+
+    return {
+        "intent": chosen_intent,
+        "confidence": confidence,
+        "explanation": "; ".join(explanation),
+        "slots": slots,
+        "corrections": corrections,
+        "clarifications": clarifications,
+    }
+
+
@@ -12,3 +12,4 @@ pyyaml>=6.0.0
 
 # Type hints for older Python versions
 typing-extensions>=4.0.0
+PyYAML==6.0.3
@@ -0,0 +1,33 @@
+# clarifier.py
+
+from typing import List, Optional
+from intent.detector import Intent
+
+class Clarifier:
+    """
+    Checks if the detected intents have missing information.
+    Returns a clarifying question if needed.
+    """
+
+    def needs_clarification(self, intents: List[Intent], text: str) -> Optional[str]:
+        text = text.lower()
+
+        # 1. If user mentions "gpu" but has not specified which GPU → ask
+        if "gpu" in text and not any(i.target in ["cuda", "pytorch", "tensorflow"] for i in intents):
+            return "Do you have an NVIDIA GPU? (Needed for CUDA/PyTorch/TensorFlow installation)"
+
+        # 2. If user says "machine learning tools" but nothing specific
+        generic_terms = ["ml", "machine learning", "deep learning", "ai tools"]
+        if any(term in text for term in generic_terms) and len(intents) == 0:
+            return "Which ML frameworks do you need? (PyTorch, TensorFlow, JupyterLab...)"
+
+        # 3. If user asks to install CUDA but no GPU exists in context
+        if any(i.target == "cuda" for i in intents) and "gpu" not in text:
+            return "Installing CUDA requires an NVIDIA GPU. Do you have one?"
+
+        # 4. If package versions are missing (later we can add real version logic)
+        if "torch" in text and "version" not in text:
+            return "Do you need the GPU version or CPU version of PyTorch?"
+
+        # 5. Otherwise no clarification needed
+        return None
@@ -0,0 +1,69 @@
+# context.py
+
+from typing import List, Optional
+from intent.detector import Intent
+
+class SessionContext:
+    """
+    Stores context from previous user interactions.
+    This is needed for Issue #53:
+    'Uses context from previous commands'
+    """
+
+    def __init__(self):
+        self.detected_gpu: Optional[str] = None
+        self.previous_intents: List[Intent] = []
+        self.installed_packages: List[str] = []
+        self.clarifications: List[str] = []
+
+    # -------------------
+    # GPU CONTEXT
+    # -------------------
+
+    def set_gpu(self, gpu_name: str):
+        self.detected_gpu = gpu_name
+
+    def get_gpu(self) -> Optional[str]:
+        return self.detected_gpu
+
+    # -------------------
+    # INTENT CONTEXT
+    # -------------------
+
+    def add_intents(self, intents: List[Intent]):
+        self.previous_intents.extend(intents)
+
+    def get_previous_intents(self) -> List[Intent]:
+        return self.previous_intents
+
+    # -------------------
+    # INSTALLED PACKAGES
+    # -------------------
+
+    def add_installed(self, pkg: str):
+        if pkg not in self.installed_packages:
+            self.installed_packages.append(pkg)
+
+    def is_installed(self, pkg: str) -> bool:
+        return pkg in self.installed_packages
+
+    # -------------------
+    # CLARIFICATIONS
+    # -------------------
+
+    def add_clarification(self, question: str):
+        self.clarifications.append(question)
+
+    def get_clarifications(self) -> List[str]:
+        return self.clarifications
+
+    # -------------------
+    # RESET CONTEXT
+    # -------------------
+
+    def reset(self):
+        """Reset context (new session)"""
+        self.detected_gpu = None
+        self.previous_intents = []
+        self.installed_packages = []
+        self.clarifications = []
@@ -0,0 +1,49 @@
+# detector.py
+
+from dataclasses import dataclass
+from typing import List, Optional, ClassVar
+
+@dataclass
+class Intent:
+    action: str
+    target: str
+    details: Optional[dict] = None
+
+class IntentDetector:
+    """
+    Extracts high-level installation intents from natural language requests.
+    """
+
+    COMMON_PACKAGES: ClassVar[dict[str, List[str]]] = {
+        "cuda": ["cuda", "nvidia toolkit"],
+        "pytorch": ["pytorch", "torch"],
+        "tensorflow": ["tensorflow", "tf"],
+        "jupyter": ["jupyter", "jupyterlab", "notebook"],
+        "cudnn": ["cudnn"],
+        "gpu": ["gpu", "graphics card", "rtx", "nvidia"]
+    }
+
+    def detect(self, text: str) -> List[Intent]:
+        text = text.lower()
+        intents = []
+
+        # 1. Rule-based keyword detection (skip GPU to avoid duplicate install intent)
+        for pkg, keywords in self.COMMON_PACKAGES.items():
+            if pkg == "gpu":
+                continue  # GPU handled separately below
+            if any(k in text for k in keywords):
+                intents.append(Intent(action="install", target=pkg))
+
+        # 2. Look for verify steps
+        if "verify" in text or "check" in text:
+            intents.append(Intent(action="verify", target="installation"))
+
+        # 3. GPU configure intent (use all GPU synonyms)
+        gpu_keywords = self.COMMON_PACKAGES.get("gpu", ["gpu"])
+        if any(k in text for k in gpu_keywords) and not any(
+            i.action == "configure" and i.target == "gpu"
+            for i in intents
+        ):
+            intents.append(Intent(action="configure", target="gpu"))
+
+        return intents
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,3 +12,4 @@ pyyaml>=6.0.0

		# Type hints for older Python versions
		typing-extensions>=4.0.0
		PyYAML==6.0.3