diff --git a/.gitignore b/.gitignore index 15b0ec7..fb85ab2 100644 --- a/.gitignore +++ b/.gitignore @@ -42,7 +42,6 @@ MANIFEST # ============================== # PyInstaller # ============================== -# Usually contains temporary files from pyinstaller builds *.manifest *.spec diff --git a/nl_parser.py b/nl_parser.py new file mode 100644 index 0000000..5f93c98 --- /dev/null +++ b/nl_parser.py @@ -0,0 +1,222 @@ +import difflib +import re +from difflib import SequenceMatcher +from typing import Dict, Any, List, Tuple + +# Vocabulary for typo correction +VOCAB = { + "python", "pip", "venv", "virtualenv", "conda", "anaconda", + "docker", "kubernetes", "k8s", "kubectl", + "nginx", "apache", "httpd", "web", "server", + "flask", "django", "tensorflow", "pytorch", "torch", + "install", "setup", "development", "env", "environment", +} + +# Canonical examples for lightweight semantic matching +INTENT_EXAMPLES = { + "install_ml": [ + "install something for machine learning", + "install pytorch", + "install tensorflow", + "i want to run pytorch", + ], + "install_web_server": [ + "i need a web server", + "install nginx", + "install apache", + "set up a web server", + ], + "setup_python_env": [ + "set up python development environment", + "install python 3.10", + "create python venv", + "setup dev env", + ], + "install_docker": [ + "install docker", + "add docker", + "deploy containers - docker", + ], + "install_docker_k8s": [ + "install docker and kubernetes", + "docker and k8s", + "k8s and docker on my mac", + ], +} + + +def normalize(text: str) -> str: + text = text.lower() + text = text.replace("-", " ") + text = re.sub(r"[^a-z0-9.\s]", " ", text) + text = re.sub(r"\s+", " ", text).strip() + return text + + +def tokenize(text: str) -> List[str]: + return text.split() + + +def spell_correct_token(token: str) -> Tuple[str, bool]: + """Return corrected_token, was_corrected""" + if token in VOCAB: + return token, False + close = difflib.get_close_matches(token, VOCAB, n=1, cutoff=0.75) + if close: + return close[0], True + return token, False + + +def apply_spell_correction(tokens: List[str]) -> Tuple[List[str], List[Tuple[str, str]]]: + corrections = [] + new_tokens = [] + for t in tokens: + new, fixed = spell_correct_token(t) + if fixed: + corrections.append((t, new)) + new_tokens.append(new) + return new_tokens, corrections + + +def fuzzy_phrase_score(a: str, b: str) -> float: + return SequenceMatcher(None, a, b).ratio() + + +def semantic_intent_score(text: str) -> Tuple[str, float]: + """Compare text with intent examples.""" + best_intent = "unknown" + best_score = 0.0 + + for intent, examples in INTENT_EXAMPLES.items(): + for ex in examples: + score = fuzzy_phrase_score(text, ex) + if score > best_score: + best_score = score + best_intent = intent + + return best_intent, best_score + + +def rule_intent(text: str) -> Tuple[str, float]: + """Simple keyword/rule-based detection.""" + t = text + + if "docker" in t: + if "kubernetes" in t or "k8s" in t or "kubectl" in t: + return "install_docker_k8s", 0.95 + return "install_docker", 0.9 + + if "kubernetes" in t or "k8s" in t or "kubectl" in t: + return "install_docker_k8s", 0.9 + + if "nginx" in t or "apache" in t or "httpd" in t or "web server" in t: + return "install_web_server", 0.9 + + if "python" in t or "venv" in t or "conda" in t or "anaconda" in t: + return "setup_python_env", 0.9 + + if any(word in t for word in ("tensorflow", "pytorch", "torch", "machine learning", "ml")): + return "install_ml", 0.9 + + return "unknown", 0.0 + + +VERSION_RE = re.compile(r"python\s*([0-9]+(?:\.[0-9]+)?)") +PLATFORM_RE = re.compile(r"\b(mac|macos|windows|linux|ubuntu|debian)\b") +PACKAGE_RE = re.compile(r"\b(nginx|apache|docker|kubernetes|k8s|kubectl|python|pip|venv|conda|tensorflow|pytorch)\b") + + +def extract_slots(text: str) -> Dict[str, Any]: + slots = {} + + v = VERSION_RE.search(text) + if v: + slots["python_version"] = v.group(1) + + p = PLATFORM_RE.search(text) + if p: + slots["platform"] = p.group(1) + + pkgs = PACKAGE_RE.findall(text) + if pkgs: + slots["packages"] = list(dict.fromkeys(pkgs)) # unique preserve order + + return slots + + +def aggregate_confidence(c_rule, c_sem, num_corrections, c_classifier=0.0): + penalty = 1 - (num_corrections * 0.1) + penalty = max(0.0, penalty) + + final = ( + 0.4 * c_rule + + 0.4 * c_sem + + 0.2 * c_classifier + ) * penalty + + return round(max(0.0, min(1.0, final)), 2) + + +def decide_clarifications(intent, confidence): + if intent == "unknown" or confidence < 0.6: + return [ + "Install Docker and Kubernetes", + "Set up Python development environment", + "Install a web server (nginx/apache)", + "Install ML libraries (tensorflow/pytorch)", + ] + if intent == "setup_python_env" and confidence < 0.75: + return ["Use venv", "Use conda", "Install a specific Python version"] + return [] + + +def parse_request(text: str) -> Dict[str, Any]: + """Main function used by tests and demo.""" + norm = normalize(text) + tokens = tokenize(norm) + + tokens_corr, corrections = apply_spell_correction(tokens) + corrected_text = " ".join(tokens_corr) + + rule_int, c_rule = rule_intent(corrected_text) + sem_int, c_sem = semantic_intent_score(corrected_text) + + if rule_int != "unknown" and rule_int == sem_int: + chosen_intent = rule_int + c_classifier = 0.95 + elif rule_int != "unknown": + chosen_intent = rule_int + c_classifier = 0.0 + elif c_sem > 0.6: + chosen_intent = sem_int + c_classifier = 0.0 + else: + chosen_intent = "unknown" + c_classifier = 0.0 + + slots = extract_slots(corrected_text) + + confidence = aggregate_confidence( + c_rule, c_sem, len(corrections), c_classifier + ) + + clarifications = decide_clarifications(chosen_intent, confidence) + + explanation = [] + if corrections: + explanation.append( + "corrected: " + ", ".join(f"{a}->{b}" for a, b in corrections) + ) + explanation.append(f"rule_intent={rule_int} ({c_rule:.2f})") + explanation.append(f"semantic_match={sem_int} ({c_sem:.2f})") + + return { + "intent": chosen_intent, + "confidence": confidence, + "explanation": "; ".join(explanation), + "slots": slots, + "corrections": corrections, + "clarifications": clarifications, + } + + diff --git a/requirements.txt b/requirements.txt index 4077f05..27d29f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ pyyaml>=6.0.0 # Type hints for older Python versions typing-extensions>=4.0.0 +PyYAML==6.0.3 diff --git a/src/intent/__init__.py b/src/intent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/intent/clarifier.py b/src/intent/clarifier.py new file mode 100644 index 0000000..96bd9df --- /dev/null +++ b/src/intent/clarifier.py @@ -0,0 +1,33 @@ +# clarifier.py + +from typing import List, Optional +from intent.detector import Intent + +class Clarifier: + """ + Checks if the detected intents have missing information. + Returns a clarifying question if needed. + """ + + def needs_clarification(self, intents: List[Intent], text: str) -> Optional[str]: + text = text.lower() + + # 1. If user mentions "gpu" but has not specified which GPU → ask + if "gpu" in text and not any(i.target in ["cuda", "pytorch", "tensorflow"] for i in intents): + return "Do you have an NVIDIA GPU? (Needed for CUDA/PyTorch/TensorFlow installation)" + + # 2. If user says "machine learning tools" but nothing specific + generic_terms = ["ml", "machine learning", "deep learning", "ai tools"] + if any(term in text for term in generic_terms) and len(intents) == 0: + return "Which ML frameworks do you need? (PyTorch, TensorFlow, JupyterLab...)" + + # 3. If user asks to install CUDA but no GPU exists in context + if any(i.target == "cuda" for i in intents) and "gpu" not in text: + return "Installing CUDA requires an NVIDIA GPU. Do you have one?" + + # 4. If package versions are missing (later we can add real version logic) + if "torch" in text and "version" not in text: + return "Do you need the GPU version or CPU version of PyTorch?" + + # 5. Otherwise no clarification needed + return None diff --git a/src/intent/context.py b/src/intent/context.py new file mode 100644 index 0000000..c2e6dbe --- /dev/null +++ b/src/intent/context.py @@ -0,0 +1,69 @@ +# context.py + +from typing import List, Optional +from intent.detector import Intent + +class SessionContext: + """ + Stores context from previous user interactions. + This is needed for Issue #53: + 'Uses context from previous commands' + """ + + def __init__(self): + self.detected_gpu: Optional[str] = None + self.previous_intents: List[Intent] = [] + self.installed_packages: List[str] = [] + self.clarifications: List[str] = [] + + # ------------------- + # GPU CONTEXT + # ------------------- + + def set_gpu(self, gpu_name: str): + self.detected_gpu = gpu_name + + def get_gpu(self) -> Optional[str]: + return self.detected_gpu + + # ------------------- + # INTENT CONTEXT + # ------------------- + + def add_intents(self, intents: List[Intent]): + self.previous_intents.extend(intents) + + def get_previous_intents(self) -> List[Intent]: + return self.previous_intents + + # ------------------- + # INSTALLED PACKAGES + # ------------------- + + def add_installed(self, pkg: str): + if pkg not in self.installed_packages: + self.installed_packages.append(pkg) + + def is_installed(self, pkg: str) -> bool: + return pkg in self.installed_packages + + # ------------------- + # CLARIFICATIONS + # ------------------- + + def add_clarification(self, question: str): + self.clarifications.append(question) + + def get_clarifications(self) -> List[str]: + return self.clarifications + + # ------------------- + # RESET CONTEXT + # ------------------- + + def reset(self): + """Reset context (new session)""" + self.detected_gpu = None + self.previous_intents = [] + self.installed_packages = [] + self.clarifications = [] diff --git a/src/intent/detector.py b/src/intent/detector.py new file mode 100644 index 0000000..336c82f --- /dev/null +++ b/src/intent/detector.py @@ -0,0 +1,49 @@ +# detector.py + +from dataclasses import dataclass +from typing import List, Optional, ClassVar + +@dataclass +class Intent: + action: str + target: str + details: Optional[dict] = None + +class IntentDetector: + """ + Extracts high-level installation intents from natural language requests. + """ + + COMMON_PACKAGES: ClassVar[dict[str, List[str]]] = { + "cuda": ["cuda", "nvidia toolkit"], + "pytorch": ["pytorch", "torch"], + "tensorflow": ["tensorflow", "tf"], + "jupyter": ["jupyter", "jupyterlab", "notebook"], + "cudnn": ["cudnn"], + "gpu": ["gpu", "graphics card", "rtx", "nvidia"] + } + + def detect(self, text: str) -> List[Intent]: + text = text.lower() + intents = [] + + # 1. Rule-based keyword detection (skip GPU to avoid duplicate install intent) + for pkg, keywords in self.COMMON_PACKAGES.items(): + if pkg == "gpu": + continue # GPU handled separately below + if any(k in text for k in keywords): + intents.append(Intent(action="install", target=pkg)) + + # 2. Look for verify steps + if "verify" in text or "check" in text: + intents.append(Intent(action="verify", target="installation")) + + # 3. GPU configure intent (use all GPU synonyms) + gpu_keywords = self.COMMON_PACKAGES.get("gpu", ["gpu"]) + if any(k in text for k in gpu_keywords) and not any( + i.action == "configure" and i.target == "gpu" + for i in intents + ): + intents.append(Intent(action="configure", target="gpu")) + + return intents diff --git a/src/intent/llm_agent.py b/src/intent/llm_agent.py new file mode 100644 index 0000000..8a4eb72 --- /dev/null +++ b/src/intent/llm_agent.py @@ -0,0 +1,162 @@ +# src/intent/llm_agent.py + +# ------------------------------- +# Safe import of anthropic SDK +# ------------------------------- +try: + from anthropic import Anthropic +except ImportError: + Anthropic = None + +from intent.detector import IntentDetector, Intent +from intent.planner import InstallationPlanner +from intent.clarifier import Clarifier +from intent.context import SessionContext + + +class LLMIntentAgent: + """ + High-level orchestrator combining: + - rule-based intent detection + - optional LLM-enhanced interpretation + - planning & optimization + - clarification handling + - session context + """ + + def __init__(self, api_key: str | None = None, + model: str = "claude-3-5-sonnet-20240620"): + + # LLM is enabled ONLY if SDK + API key is available + if Anthropic is None or api_key is None: + self.llm = None + else: + self.llm = Anthropic(api_key=api_key) + + self.model = model + + self.detector = IntentDetector() + self.planner = InstallationPlanner() + self.clarifier = Clarifier() + self.context = SessionContext() + + # ---------------------------------------------- + # Main request handler + # ---------------------------------------------- + def process(self, text: str): + # 1. Rule-based intent detection + intents = self.detector.detect(text) + + # 2. Ask clarification if needed + clarifying_q = self.clarifier.needs_clarification(intents, text) + if clarifying_q: + self.context.add_clarification(clarifying_q) + return {"clarification_needed": clarifying_q} + + # 3. If LLM is unavailable → fallback mode + if self.llm is None: + self.context.add_intents(intents) + return { + "intents": intents, + "plan": self.planner.build_plan(intents), + "suggestions": [], + "gpu": self.context.get_gpu() + } + + # 4. Improve intents using LLM (safe) + try: + improved_intents = self.enhance_intents_with_llm(text, intents) + except Exception: + improved_intents = intents + + # Save them to context + self.context.add_intents(improved_intents) + + # 5. Build installation plan + plan = self.planner.build_plan(improved_intents) + + # 6. Optional suggestions from LLM (safe) + try: + suggestions = self.suggest_optimizations(text) + except Exception: + suggestions = [] + + return { + "intents": improved_intents, + "plan": plan, + "suggestions": suggestions, + "gpu": self.context.get_gpu() + } + + # ---------------------------------------------- + # LLM enhancement of intents + # ---------------------------------------------- + def enhance_intents_with_llm(self, text: str, intents: list[Intent]) -> list[Intent]: + + prompt = f""" +You are an installation-intent expert. Convert the user request into structured intents. + +User request: "{text}" + +Initial intents detected: +{[str(i) for i in intents]} + +Return improvements or extra intents. +Format: "install: package" or "configure: component" +""" + + # add explicit timeout to avoid long hangs + response = self.llm.with_options(timeout=30.0).messages.create( + model=self.model, + max_tokens=300, + messages=[{"role": "user", "content": prompt}] + ) + + # ---- Safety check ---- + if not getattr(response, "content", None) or not hasattr(response.content[0], "text"): + return intents + + llm_output = response.content[0].text.lower().split("\n") + + new_intents = intents[:] + + for line in llm_output: + if "install:" in line: + pkg = line.replace("install:", "").strip() + if pkg: + new_intents.append(Intent("install", pkg)) + elif "configure:" in line: + target = line.replace("configure:", "").strip() + if target: + new_intents.append(Intent("configure", target)) + elif "verify:" in line: + target = line.replace("verify:", "").strip() + if target: + new_intents.append(Intent("verify", target)) + + return new_intents + + # ---------------------------------------------- + # LLM optimization suggestions + # ---------------------------------------------- + def suggest_optimizations(self, text: str) -> list[str]: + + prompt = f""" +User request: "{text}" + +Suggest optional tools to improve ML installation. +Examples: Conda, VSCode extensions, CUDA toolkit managers, Docker, Anaconda. +Return bullet list only. +""" + + response = self.llm.with_options(timeout=30.0).messages.create( + model=self.model, + max_tokens=150, + messages=[{"role": "user", "content": prompt}] + ) + + # ---- Safety check ---- + if not getattr(response, "content", None) or not hasattr(response.content[0], "text"): + return [] + + return [line.strip() for line in response.content[0].text.strip().split("\n") if line.strip()] diff --git a/src/intent/planner.py b/src/intent/planner.py new file mode 100644 index 0000000..5220ecb --- /dev/null +++ b/src/intent/planner.py @@ -0,0 +1,51 @@ +# planner.py + +from typing import List +from intent.detector import Intent + +class InstallationPlanner: + + GPU_PACKAGES = ["cuda", "cudnn", "pytorch", "tensorflow"] + + def build_plan(self, intents: List[Intent]) -> List[str]: + plan = [] + installed = set() + + # 1. If GPU-related intents exist → add GPU detection + has_gpu = any(i.target == "gpu" for i in intents) + if has_gpu: + plan.append("Detect GPU: Run `nvidia-smi` or PCI scan") + + # 2. Add installation steps based on intent order + for intent in intents: + if intent.action == "install" and intent.target not in installed: + + if intent.target == "cuda": + plan.append("Install CUDA 12.3 + drivers") + + elif intent.target == "cudnn": + plan.append("Install cuDNN (matching CUDA version)") + + elif intent.target == "pytorch": + plan.append("Install PyTorch (GPU support)") + + elif intent.target == "tensorflow": + plan.append("Install TensorFlow (GPU support)") + + elif intent.target == "jupyter": + plan.append("Install JupyterLab") + + elif intent.target == "gpu": + # GPU setup is handled by CUDA/cuDNN + pass + + installed.add(intent.target) + + # 3. Add GPU configuration if needed + if has_gpu: + plan.append("Configure GPU acceleration environment") + + # 4. Add verification step + plan.append("Verify installation and GPU acceleration") + + return plan diff --git a/src/test_clarifier.py b/src/test_clarifier.py new file mode 100644 index 0000000..a16d3f8 --- /dev/null +++ b/src/test_clarifier.py @@ -0,0 +1,12 @@ +from intent.detector import IntentDetector +from intent.clarifier import Clarifier + +def test_clarifier_gpu_missing(): + d = IntentDetector() + c = Clarifier() + + text = "I want to run ML models" + intents = d.detect(text) + + question = c.needs_clarification(intents, text) + assert question is not None diff --git a/src/test_context.py b/src/test_context.py new file mode 100644 index 0000000..c4e521f --- /dev/null +++ b/src/test_context.py @@ -0,0 +1,13 @@ +from intent.context import SessionContext +from intent.detector import Intent + +def test_context_storage(): + ctx = SessionContext() + ctx.set_gpu("NVIDIA RTX 4090") + + ctx.add_intents([Intent("install", "cuda")]) + ctx.add_installed("cuda") + + assert ctx.get_gpu() == "NVIDIA RTX 4090" + assert ctx.is_installed("cuda") is True + assert len(ctx.get_previous_intents()) == 1 diff --git a/src/test_intent_detection.py b/src/test_intent_detection.py new file mode 100644 index 0000000..5dbecaa --- /dev/null +++ b/src/test_intent_detection.py @@ -0,0 +1,15 @@ +from intent.detector import IntentDetector, Intent + +def test_detector_basic(): + d = IntentDetector() + intents = d.detect("Install CUDA and PyTorch for GPU") + + targets = {i.target for i in intents} + assert "cuda" in targets + assert "pytorch" in targets + assert "gpu" in targets + +def test_detector_empty(): + d = IntentDetector() + intents = d.detect("Hello world, nothing here") + assert intents == [] diff --git a/src/test_llm_agent.py b/src/test_llm_agent.py new file mode 100644 index 0000000..f1be07a --- /dev/null +++ b/src/test_llm_agent.py @@ -0,0 +1,28 @@ +from intent.llm_agent import LLMIntentAgent + +class MockMessages: + def create(self, **kwargs): + class Response: + class Content: + text = "install: tensorflow\ninstall: jupyter" + content = [Content()] + return Response() + +class MockLLM: + def __init__(self): + self.messages = MockMessages() + +def test_llm_agent_mocked(): + agent = LLMIntentAgent(api_key="fake-key") + + # Replace real LLM with mock + agent.llm = MockLLM() + + # Disable clarification during testing + agent.clarifier.needs_clarification = lambda *a, **k: None + + result = agent.process("Install ML tools on GPU") + + assert "plan" in result + assert len(result["plan"]) > 0 + assert "suggestions" in result diff --git a/src/test_planner.py b/src/test_planner.py new file mode 100644 index 0000000..fde7b15 --- /dev/null +++ b/src/test_planner.py @@ -0,0 +1,16 @@ +from intent.detector import Intent +from intent.planner import InstallationPlanner + +def test_planner_cuda_pipeline(): + planner = InstallationPlanner() + intents = [ + Intent("install", "cuda"), + Intent("install", "pytorch"), + Intent("configure", "gpu") + ] + plan = planner.build_plan(intents) + + assert "Install CUDA 12.3 + drivers" in plan + assert "Install PyTorch (GPU support)" in plan + assert "Configure GPU acceleration environment" in plan + assert plan[-1] == "Verify installation and GPU acceleration" diff --git a/test/test_installation_history.py b/test/test_installation_history.py index 7ab2720..3df1170 100644 --- a/test/test_installation_history.py +++ b/test/test_installation_history.py @@ -7,6 +7,7 @@ import tempfile import os from datetime import datetime + from cortex.installation_history import ( InstallationHistory, InstallationType, diff --git a/tests/test_nl_parser.py b/tests/test_nl_parser.py new file mode 100644 index 0000000..21de9f1 --- /dev/null +++ b/tests/test_nl_parser.py @@ -0,0 +1,37 @@ +import pytest +from nl_parser import parse_request + +@pytest.mark.parametrize("text,expected", [ + ("install something for machine learning", "install_ml"), + ("I need a web server", "install_web_server"), + ("set up python development environment", "setup_python_env"), + ("install docker and kubernets", "install_docker_k8s"), + ("Can you provision a python env with pip, venv and flake8?", "setup_python_env"), + ("need nginx or apache for a website", "install_web_server"), + ("deploy containers - docker", "install_docker"), + ("k8s and docker on my mac", "install_docker_k8s"), + ("i want to run pytorch", "install_ml"), + ("setup dev env", "ambiguous"), + ("add docker", "install_docker"), + ("pls install pyhton 3.10", "setup_python_env"), +]) +def test_intent(text, expected): + result = parse_request(text) + intent = result["intent"] + confidence = result["confidence"] + + if expected == "ambiguous": + assert result["clarifications"], f"Expected clarifications for: {text}" + else: + assert intent == expected + assert confidence >= 0.5 + +def test_corrections(): + r = parse_request("install docker and kubernets") + assert r["intent"] == "install_docker_k8s" + assert any(orig == "kubernets" for orig, _ in r["corrections"]) + +def test_slot_extraction(): + r = parse_request("pls install python 3.10 on mac") + assert r["slots"].get("python_version") == "3.10" + assert r["slots"].get("platform") in ("mac", "macos") \ No newline at end of file