added object detection

winzamark123 · winzamark123 · commit 6ead14939a80 · 2025-06-20T17:27:16.000-07:00
diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py
@@ -15,6 +15,7 @@
 from .embedding import Embedding, AsyncEmbedding
 from .exceptions import JigsawStackError
 from .image_generation import ImageGeneration, AsyncImageGeneration
+from .object_detection import ObjectDetection, AsyncObjectDetection
 
 
 class JigsawStack:
@@ -118,6 +119,12 @@ def __init__(
             disable_request_logging=disable_request_logging,
         ).image_generation
 
+        self.object_detection = ObjectDetection(
+            api_key=api_key,
+            api_url=api_url,
+            disable_request_logging=disable_request_logging,
+        )
+
 
 class AsyncJigsawStack:
     validate: AsyncValidate
@@ -228,6 +235,12 @@ def __init__(
             disable_request_logging=disable_request_logging,
         ).image_generation
 
+        self.object_detection = AsyncObjectDetection(
+            api_key=api_key,
+            api_url=api_url,
+            disable_request_logging=disable_request_logging,
+        )
+
 
 # Create a global instance of the Web class
 __all__ = ["JigsawStack", "Search", "JigsawStackError", "AsyncJigsawStack"]
diff --git a/jigsawstack/_client.py b/jigsawstack/_client.py
diff --git a/jigsawstack/object_detection.py b/jigsawstack/object_detection.py
@@ -0,0 +1,206 @@
+from typing import Any, Dict, List, Union, cast, Literal
+from typing_extensions import NotRequired, TypedDict
+from .request import Request, RequestConfig
+from .async_request import AsyncRequest, AsyncRequestConfig
+from ._config import ClientConfig
+
+
+class Point(TypedDict):
+    x: int
+    """
+    X coordinate of the point
+    """
+    
+    y: int
+    """
+    Y coordinate of the point
+    """
+
+
+class BoundingBox(TypedDict):
+    top_left: Point
+    """
+    Top-left corner of the bounding box
+    """
+    
+    top_right: Point
+    """
+    Top-right corner of the bounding box
+    """
+    
+    bottom_left: Point
+    """
+    Bottom-left corner of the bounding box
+    """
+    
+    bottom_right: Point
+    """
+    Bottom-right corner of the bounding box
+    """
+    
+    width: int
+    """
+    Width of the bounding box
+    """
+    
+    height: int
+    """
+    Height of the bounding box
+    """
+
+
+class GuiElement(TypedDict):
+    bounds: BoundingBox
+    """
+    Bounding box coordinates of the GUI element
+    """
+    
+    content: Union[str, None]
+    """
+    Content of the GUI element, can be null if no object detected
+    """
+
+
+class DetectedObject(TypedDict):
+    bounds: BoundingBox
+    """
+    Bounding box coordinates of the detected object
+    """
+    
+    mask: NotRequired[str]
+    """
+    URL or base64 string depending on return_type - only present for some objects
+    """
+
+
+class UsageStats(TypedDict):
+    """
+    Usage statistics - structure depends on the RunPod response
+    """
+    pass  # Flexible structure for usage stats
+
+
+class ObjectDetectionParams(TypedDict):
+    url: NotRequired[str]
+    """
+    URL of the image to process
+    """
+    
+    file_store_key: NotRequired[str]
+    """
+    File store key of the image to process
+    """
+    
+    prompts: NotRequired[List[str]]
+    """
+    List of prompts for object detection
+    """
+    
+    features: NotRequired[List[Literal["object_detection", "gui"]]]
+    """
+    List of features to enable: object_detection, gui
+    """
+    
+    annotated_image: NotRequired[bool]
+    """
+    Whether to return an annotated image
+    """
+    
+    return_type: NotRequired[Literal["url", "base64"]]
+    """
+    Format for returned images: url or base64
+    """
+
+
+class ObjectDetectionResponse(TypedDict):
+    annotated_image: NotRequired[str]
+    """
+    URL or base64 string of annotated image (included only if annotated_image=true and objects/gui_elements exist)
+    """
+    
+    gui_elements: NotRequired[List[GuiElement]]
+    """
+    List of detected GUI elements (included only if features includes "gui")
+    """
+    
+    objects: NotRequired[List[DetectedObject]]
+    """
+    List of detected objects (included only if features includes "object_detection")
+    """
+    
+    _usage: NotRequired[UsageStats]
+    """
+    Optional usage statistics
+    """
+
+
+class ObjectDetection(ClientConfig):
+    config: RequestConfig
+
+    def __init__(
+        self,
+        api_key: str,
+        api_url: str,
+        disable_request_logging: Union[bool, None] = False,
+    ):
+        super().__init__(api_key, api_url, disable_request_logging)
+        self.config = RequestConfig(
+            api_url=api_url,
+            api_key=api_key,
+            disable_request_logging=disable_request_logging,
+        )
+
+    def detect(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
+        """
+        Detect objects and/or GUI elements in an image
+        
+        Args:
+            params: Object detection parameters
+            
+        Returns:
+            Object detection response with detected objects, GUI elements, and optional annotated image
+        """
+        resp = Request(
+            config=self.config,
+            path="/ai/object_detection",
+            params=cast(Dict[Any, Any], params),
+            verb="POST",
+        ).perform_with_content()
+        
+        return resp
+
+
+class AsyncObjectDetection(ClientConfig):
+    config: AsyncRequestConfig
+
+    def __init__(
+        self,
+        api_key: str,
+        api_url: str,
+        disable_request_logging: Union[bool, None] = False,
+    ):
+        super().__init__(api_key, api_url, disable_request_logging)
+        self.config = AsyncRequestConfig(
+            api_url=api_url,
+            api_key=api_key,
+            disable_request_logging=disable_request_logging,
+        )
+
+    async def detect(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
+        """
+        Detect objects and/or GUI elements in an image (async)
+        
+        Args:
+            params: Object detection parameters
+            
+        Returns:
+            Object detection response with detected objects, GUI elements, and optional annotated image
+        """
+        resp = await AsyncRequest(
+            config=self.config,
+            path="/ai/object_detection",
+            params=cast(Dict[Any, Any], params),
+            verb="POST",
+        ).perform_with_content()
+        
+        return resp
diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py
@@ -0,0 +1,36 @@
+from unittest.mock import MagicMock
+import unittest
+from jigsawstack.exceptions import JigsawStackError
+import jigsawstack
+import pytest
+import asyncio
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+jigsaw = jigsawstack.JigsawStack()
+async_jigsaw = jigsawstack.AsyncJigsawStack()
+
+
+def test_object_detection_response():
+    try:
+        result = jigsaw.object_detection.detect({"url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg"})
+        print(result)
+        assert result["success"] == True
+    except JigsawStackError as e:
+        pytest.fail(f"Unexpected JigsawStackError: {e}")
+
+
+def test_object_detection_response_async():
+    async def _test():
+        client = jigsawstack.AsyncJigsawStack()
+        try:
+            result = await client.object_detection.detect({"url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg"})
+            print(result)
+            assert result["success"] == True
+        except JigsawStackError as e:
+            pytest.fail(f"Unexpected JigsawStackError: {e}")
+
+    asyncio.run(_test())
+
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -14,6 +14,22 @@
 
 
 def test_search_suggestion_response():
+    try:
+        result = jigsaw.web.search({"query": "Where is San Francisco"})
+        assert result["success"] == True
+    except JigsawStackError as e:
+        pytest.fail(f"Unexpected JigsawStackError: {e}")
+
+
+def test_ai_search_response():
+    try:
+        result = jigsaw.web.search({"query": "Where is San Francisco"})
+        assert result["success"] == True  
+    except JigsawStackError as e:
+        pytest.fail(f"Unexpected JigsawStackError: {e}")
+
+
+def test_search_suggestion_response_async():
     async def _test():
         client = jigsawstack.AsyncJigsawStack()
         try:
@@ -25,7 +41,7 @@ async def _test():
     asyncio.run(_test())
 
 
-def test_ai_search_response():
+def test_ai_search_response_async():
     async def _test():
         client = jigsawstack.AsyncJigsawStack()
         try: