Skip to content

Commit ba56ab1

Browse files
feat: seperating test cases for deepresearch and ai_scrape.
1 parent c3c2541 commit ba56ab1

File tree

4 files changed

+242
-235
lines changed

4 files changed

+242
-235
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,8 @@ on:
88

99
jobs:
1010
ruff-format-check:
11-
name: Ruff Format Check - ${{ matrix.file }}
11+
name: Ruff Format Check
1212
runs-on: ubuntu-latest
13-
strategy:
14-
fail-fast: false
15-
matrix:
16-
file:
17-
- __init__.py
18-
- _config.py
19-
- _types.py
20-
- async_request.py
21-
- audio.py
22-
- classification.py
23-
- embedding_v2.py
24-
- embedding.py
25-
- exceptions.py
26-
- helpers.py
27-
- image_generation.py
28-
- prediction.py
29-
- prompt_engine.py
30-
- request.py
31-
- search.py
32-
- sentiment.py
33-
- sql.py
34-
- store.py
35-
- summary.py
36-
- translate.py
37-
- validate.py
38-
- vision.py
39-
- web.py
4013
steps:
4114
- uses: actions/checkout@v4
4215

@@ -48,9 +21,10 @@ jobs:
4821
- name: Install ruff
4922
run: pip install ruff
5023

51-
- name: Check formatting for ${{ matrix.file }}
24+
- name: Check all files with ruff
5225
run: |
53-
ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml
26+
ruff check jigsawstack/ --config .github/ruff.toml
27+
ruff format --check jigsawstack/ --config .github/ruff.toml
5428
5529
test:
5630
name: Test - ${{ matrix.test-file }}
@@ -72,6 +46,8 @@ jobs:
7246
- test_translate.py
7347
- test_validate.py
7448
- test_web.py
49+
- test_deep_research.py
50+
- test_ai_scrape.py
7551
steps:
7652
- uses: actions/checkout@v4
7753

tests/test_ai_scrape.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import logging
2+
import os
3+
4+
import pytest
5+
from dotenv import load_dotenv
6+
7+
import jigsawstack
8+
from jigsawstack.exceptions import JigsawStackError
9+
10+
load_dotenv()
11+
12+
logging.basicConfig(level=logging.INFO)
13+
logger = logging.getLogger(__name__)
14+
15+
jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY"))
16+
async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY"))
17+
18+
URL = "https://jigsawstack.com"
19+
20+
# AI Scrape Test Cases
21+
AI_SCRAPE_TEST_CASES = [
22+
{
23+
"name": "scrape_with_element_prompts",
24+
"params": {
25+
"url": URL,
26+
"element_prompts": ["title", "main content", "navigation links"],
27+
},
28+
},
29+
{
30+
"name": "scrape_with_selectors",
31+
"params": {
32+
"url": URL,
33+
"selectors": ["h1", "p", "a"],
34+
},
35+
},
36+
{
37+
"name": "scrape_with_features",
38+
"params": {
39+
"url": URL,
40+
"element_prompts": ["title"],
41+
"features": ["meta", "link"],
42+
},
43+
},
44+
{
45+
"name": "scrape_with_root_element",
46+
"params": {
47+
"url": URL,
48+
"element_prompts": ["content"],
49+
"root_element_selector": "main",
50+
},
51+
},
52+
{
53+
"name": "scrape_with_wait_for_timeout",
54+
"params": {
55+
"url": URL,
56+
"element_prompts": ["content"],
57+
"wait_for": {"mode": "timeout", "value": 3000},
58+
},
59+
},
60+
{
61+
"name": "scrape_mobile_view",
62+
"params": {
63+
"url": URL,
64+
"element_prompts": ["mobile menu"],
65+
"is_mobile": True,
66+
},
67+
},
68+
{
69+
"name": "scrape_with_cookies",
70+
"params": {
71+
"url": URL,
72+
"element_prompts": ["user data"],
73+
"cookies": [{"name": "session", "value": "test123", "domain": "example.com"}],
74+
},
75+
},
76+
{
77+
"name": "scrape_with_advance_config",
78+
"params": {
79+
"url": URL,
80+
"element_prompts": ["content"],
81+
"advance_config": {"console": True, "network": True, "cookies": True},
82+
},
83+
},
84+
]
85+
86+
87+
class TestAIScrapeSync:
88+
"""Test synchronous AI scrape methods"""
89+
90+
@pytest.mark.parametrize(
91+
"test_case",
92+
AI_SCRAPE_TEST_CASES,
93+
ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES],
94+
)
95+
def test_ai_scrape(self, test_case):
96+
"""Test synchronous AI scrape with various inputs"""
97+
try:
98+
result = jigsaw.web.ai_scrape(test_case["params"])
99+
100+
assert result["success"]
101+
assert "data" in result
102+
assert isinstance(result["data"], list)
103+
104+
# Check for optional features
105+
if "meta" in test_case["params"].get("features", []):
106+
assert "meta" in result
107+
if "link" in test_case["params"].get("features", []):
108+
assert "link" in result
109+
assert isinstance(result["link"], list)
110+
111+
except JigsawStackError as e:
112+
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
113+
114+
115+
class TestAIScrapeAsync:
116+
"""Test asynchronous AI scrape methods"""
117+
118+
@pytest.mark.parametrize(
119+
"test_case",
120+
AI_SCRAPE_TEST_CASES,
121+
ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES],
122+
)
123+
@pytest.mark.asyncio
124+
async def test_ai_scrape_async(self, test_case):
125+
"""Test asynchronous AI scrape with various inputs"""
126+
try:
127+
result = await async_jigsaw.web.ai_scrape(test_case["params"])
128+
129+
assert result["success"]
130+
assert "data" in result
131+
assert isinstance(result["data"], list)
132+
133+
# Check for optional features
134+
if "meta" in test_case["params"].get("features", []):
135+
assert "meta" in result
136+
if "link" in test_case["params"].get("features", []):
137+
assert "link" in result
138+
assert isinstance(result["link"], list)
139+
140+
except JigsawStackError as e:
141+
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")

tests/test_deep_research.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import logging
2+
import os
3+
4+
import pytest
5+
from dotenv import load_dotenv
6+
7+
import jigsawstack
8+
from jigsawstack.exceptions import JigsawStackError
9+
10+
load_dotenv()
11+
12+
logging.basicConfig(level=logging.INFO)
13+
logger = logging.getLogger(__name__)
14+
15+
jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY"))
16+
async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY"))
17+
18+
URL = "https://jigsawstack.com"
19+
20+
21+
# Deep Research Test Cases
22+
DEEP_RESEARCH_TEST_CASES = [
23+
{
24+
"name": "basic_deep_research",
25+
"params": {
26+
"query": "climate change effects",
27+
},
28+
},
29+
{
30+
"name": "technical_deep_research",
31+
"params": {
32+
"query": "quantum computing applications in cryptography",
33+
},
34+
},
35+
{
36+
"name": "deep_research_with_depth",
37+
"params": {
38+
"query": "renewable energy sources",
39+
"depth": 2,
40+
},
41+
},
42+
]
43+
44+
45+
class TestDeepResearchSync:
46+
"""Test synchronous deep research methods"""
47+
48+
@pytest.mark.parametrize(
49+
"test_case",
50+
DEEP_RESEARCH_TEST_CASES,
51+
ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES],
52+
)
53+
def test_deep_research(self, test_case):
54+
"""Test synchronous deep research with various inputs"""
55+
try:
56+
result = jigsaw.web.deep_research(test_case["params"])
57+
58+
assert result["success"]
59+
assert "results" in result
60+
assert isinstance(result["results"], str)
61+
assert len(result["results"]) > 0
62+
63+
# Check for sources
64+
if "sources" in result:
65+
assert isinstance(result["sources"], list)
66+
67+
except JigsawStackError as e:
68+
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
69+
70+
71+
class TestDeepResearchAsync:
72+
"""Test asynchronous deep research methods"""
73+
74+
@pytest.mark.parametrize(
75+
"test_case",
76+
DEEP_RESEARCH_TEST_CASES,
77+
ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES],
78+
)
79+
@pytest.mark.asyncio
80+
async def test_deep_research_async(self, test_case):
81+
"""Test asynchronous deep research with various inputs"""
82+
try:
83+
result = await async_jigsaw.web.deep_research(test_case["params"])
84+
85+
assert result["success"]
86+
assert "results" in result
87+
assert isinstance(result["results"], str)
88+
assert len(result["results"]) > 0
89+
90+
# Check for sources
91+
if "sources" in result:
92+
assert isinstance(result["sources"], list)
93+
94+
except JigsawStackError as e:
95+
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")

0 commit comments

Comments
 (0)