Skip to content

Commit 451ccb3

Browse files
authored
Use new estimate endpoint (take 2) (#1025)
* Reapply "Fix estimation endpoint take 3" This reverts commit 6ea56b5. * Reapply "Use new estimate endpoint on Aura API" This reverts commit 5f1fbf1. * Allow list of str for algo categories as well * Improve size parsing testing + reduce time for unit test
1 parent ae3c021 commit 451ccb3

File tree

9 files changed

+141
-35
lines changed

9 files changed

+141
-35
lines changed

changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
## Improvements
1010

1111
- `GdsSessions.get_or_create` now allows to specify the `aura_instance_id` instead of `uri` as part of the `db_connection`. This is required if the instance id could not be derived from the provided database connection URI such as for Multi-Database instances.
12+
- `GdsSessions.estimate` now recommends smaller sizes such as `2GB`. Also allows specifying property and label counts for better estimates.
1213

1314
## Other changes
1415

doc/modules/ROOT/pages/graph-analytics-serverless.adoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,9 @@ memory = sessions.estimate(
219219
node_count=20,
220220
relationship_count=50,
221221
algorithm_categories=[AlgorithmCategory.CENTRALITY, AlgorithmCategory.NODE_EMBEDDING],
222+
node_label_count=1,
223+
node_property_count=1,
224+
relationship_property_count=1
222225
)
223226
----
224227

justfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ style skip_notebooks="false":
44
convert-notebooks:
55
./scripts/nb2doc/convert.sh
66

7-
unit-tests:
8-
pytest tests/unit
7+
unit-tests extra_options="":
8+
pytest tests/unit {{extra_options}}
99

1010
# just it test true "--durations=20"
1111
it filter="" enterprise="true" extra_options="":

src/graphdatascience/session/aura_api.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,26 @@ def wait_for_instance_running(
305305
return WaitResult.from_error(f"Instance is not running after waiting for {waited_time} seconds")
306306

307307
def estimate_size(
308-
self, node_count: int, relationship_count: int, algorithm_categories: list[AlgorithmCategory]
308+
self,
309+
node_count: int,
310+
node_label_count: int,
311+
node_property_count: int,
312+
relationship_count: int,
313+
relationship_property_count: int,
314+
algorithm_categories: list[AlgorithmCategory],
309315
) -> EstimationDetails:
310316
data = {
311317
"node_count": node_count,
318+
"node_label_count": node_label_count,
319+
"node_property_count": node_property_count,
312320
"relationship_count": relationship_count,
321+
"relationship_property_count": relationship_property_count,
313322
"algorithm_categories": [i.value for i in algorithm_categories],
314-
"instance_type": "dsenterprise",
315323
}
316324

317-
response = self._request_session.post(f"{self._base_uri}/{AuraApi.API_VERSION}/instances/sizing", json=data)
325+
response = self._request_session.post(
326+
f"{self._base_uri}/{AuraApi.API_VERSION}/graph-analytics/sessions/sizing", json=data
327+
)
318328
self._check_resp(response)
319329

320330
return EstimationDetails.from_json(response.json()["data"])

src/graphdatascience/session/aura_api_responses.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,8 @@ def from_json(cls, json: dict[str, Any]) -> InstanceCreateDetails:
169169

170170
@dataclass(repr=True, frozen=True)
171171
class EstimationDetails:
172-
min_required_memory: str
172+
estimated_memory: str
173173
recommended_size: str
174-
did_exceed_maximum: bool
175174

176175
@classmethod
177176
def from_json(cls, json: dict[str, Any]) -> EstimationDetails:
@@ -181,6 +180,26 @@ def from_json(cls, json: dict[str, Any]) -> EstimationDetails:
181180

182181
return cls(**{f.name: json[f.name] for f in fields})
183182

183+
def exceeds_recommended(self) -> bool:
184+
return EstimationDetails._memory_in_bytes(self.estimated_memory) > EstimationDetails._memory_in_bytes(
185+
self.recommended_size
186+
)
187+
188+
@staticmethod
189+
def _memory_in_bytes(size: str) -> float:
190+
size_str = size.upper().strip()
191+
# treat GB, Gi and G the same as its only used for comparing it internally
192+
size_str = size_str.removesuffix("B").removesuffix("I")
193+
194+
if size_str.endswith("G"):
195+
return float(size_str[:-1]) * 1024**3 # 1GB = 1024^3 bytes
196+
elif size_str.endswith("M"):
197+
return float(size_str[:-1]) * 1024**2 # 1MB = 1024^2 bytes
198+
elif size_str.endswith("K"):
199+
return float(size_str[:-1]) * 1024 # 1KB = 1024 bytes
200+
else:
201+
return float(size_str) # assume bytes
202+
184203

185204
class WaitResult(NamedTuple):
186205
connection_url: str

src/graphdatascience/session/dedicated_sessions.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,29 @@ def estimate(
3030
self,
3131
node_count: int,
3232
relationship_count: int,
33-
algorithm_categories: list[AlgorithmCategory] | None = None,
33+
algorithm_categories: list[AlgorithmCategory] | list[str] | None = None,
34+
node_label_count: int = 0,
35+
node_property_count: int = 0,
36+
relationship_property_count: int = 0,
3437
) -> SessionMemory:
3538
if algorithm_categories is None:
3639
algorithm_categories = []
37-
estimation = self._aura_api.estimate_size(node_count, relationship_count, algorithm_categories)
40+
else:
41+
algorithm_categories = [
42+
AlgorithmCategory(cat) if isinstance(cat, str) else cat for cat in algorithm_categories
43+
]
44+
estimation = self._aura_api.estimate_size(
45+
node_count=node_count,
46+
node_label_count=node_label_count,
47+
node_property_count=node_property_count,
48+
relationship_count=relationship_count,
49+
relationship_property_count=relationship_property_count,
50+
algorithm_categories=algorithm_categories,
51+
)
3852

39-
if estimation.did_exceed_maximum:
53+
if estimation.exceeds_recommended():
4054
warnings.warn(
41-
f"The estimated memory `{estimation.min_required_memory}` exceeds the maximum size"
55+
f"The estimated memory `{estimation.estimated_memory}` exceeds the maximum size"
4256
f" supported by your Aura project (`{estimation.recommended_size}`).",
4357
ResourceWarning,
4458
)

src/graphdatascience/session/gds_sessions.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,10 @@ def __init__(self, api_credentials: AuraAPICredentials) -> None:
5555
"""
5656
Initializes a new instance of the GdsSessions class.
5757
58-
Args:
59-
api_credentials (AuraAPICredentials): The Aura API credentials used for establishing a connection.
58+
Parameters
59+
----------
60+
api_credentials
61+
The Aura API credentials used for establishing a connection.
6062
"""
6163
aura_env = os.environ.get("AURA_ENV")
6264
aura_api = AuraApi(
@@ -71,22 +73,45 @@ def estimate(
7173
self,
7274
node_count: int,
7375
relationship_count: int,
74-
algorithm_categories: list[AlgorithmCategory] | None = None,
76+
algorithm_categories: list[AlgorithmCategory] | list[str] | None = None,
77+
node_label_count: int = 0,
78+
node_property_count: int = 0,
79+
relationship_property_count: int = 0,
7580
) -> SessionMemory:
7681
"""
7782
Estimates the memory required for a session with the given node and relationship counts.
7883
79-
Args:
80-
node_count (int): The number of nodes.
81-
relationship_count (int): The number of relationships.
82-
algorithm_categories (list[AlgorithmCategory] | None): The algorithm categories to consider.
83-
84-
Returns:
85-
SessionMemory: The estimated memory required for the session.
84+
Parameters
85+
----------
86+
node_count
87+
Number of nodes.
88+
relationship_count
89+
Number of relationships.
90+
algorithm_categories
91+
The algorithm categories to consider.
92+
node_label_count
93+
Number of node labels.
94+
node_property_count
95+
Number of node properties.
96+
relationship_property_count
97+
Number of relationship properties.
98+
99+
100+
Returns
101+
-------
102+
SessionMemory
103+
The estimated memory required for the session.
86104
"""
87105
if algorithm_categories is None:
88106
algorithm_categories = []
89-
return self._impl.estimate(node_count, relationship_count, algorithm_categories)
107+
return self._impl.estimate(
108+
node_count=node_count,
109+
relationship_count=relationship_count,
110+
algorithm_categories=algorithm_categories,
111+
node_label_count=node_label_count,
112+
node_property_count=node_property_count,
113+
relationship_property_count=relationship_property_count,
114+
)
90115

91116
def available_cloud_locations(self) -> list[CloudLocation]:
92117
"""

tests/unit/session/test_dedicated_sessions.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def __init__(
4949
self.id_counter = 0
5050
self.time = 0
5151
self._status_after_creating = status_after_creating
52-
self._size_estimation = size_estimation or EstimationDetails("1GB", "8GB", False)
52+
self._size_estimation = size_estimation or EstimationDetails("1GB", "8GB")
5353
self._console_user = console_user
5454
self._admin_user = admin_user
5555

@@ -225,7 +225,13 @@ def project_details(self) -> ProjectDetails:
225225
return ProjectDetails(id=self._project_id, cloud_locations={CloudLocation("aws", "leipzig-1")})
226226

227227
def estimate_size(
228-
self, node_count: int, relationship_count: int, algorithm_categories: list[AlgorithmCategory]
228+
self,
229+
node_count: int,
230+
node_label_count: int,
231+
node_property_count: int,
232+
relationship_count: int,
233+
relationship_property_count: int,
234+
algorithm_categories: list[AlgorithmCategory],
229235
) -> EstimationDetails:
230236
return self._size_estimation
231237

@@ -893,14 +899,21 @@ def test_create_waiting_forever(
893899

894900

895901
def test_estimate_size() -> None:
896-
aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB", False))
902+
aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB"))
897903
sessions = DedicatedSessions(aura_api)
898904

899905
assert sessions.estimate(1, 1, [AlgorithmCategory.CENTRALITY]) == SessionMemory.m_8GB
900906

901907

908+
def test_estimate_str_categories_size() -> None:
909+
aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB"))
910+
sessions = DedicatedSessions(aura_api)
911+
912+
assert sessions.estimate(1, 1, ["centrality"]) == SessionMemory.m_8GB
913+
914+
902915
def test_estimate_size_exceeds() -> None:
903-
aura_api = FakeAuraApi(size_estimation=EstimationDetails("16GB", "8GB", True))
916+
aura_api = FakeAuraApi(size_estimation=EstimationDetails("16GB", "8GB"))
904917
sessions = DedicatedSessions(aura_api)
905918

906919
with pytest.warns(

tests/unit/test_aura_api.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -586,11 +586,11 @@ def test_dont_wait_forever_for_session(requests_mock: Mocker, caplog: LogCapture
586586

587587
with caplog.at_level(logging.DEBUG):
588588
assert (
589-
"Session `id0` is not running after 0.2 seconds"
590-
in api.wait_for_session_running("id0", sleep_time=0.05, max_wait_time=0.2).error
589+
"Session `id0` is not running after 0.01 seconds"
590+
in api.wait_for_session_running("id0", sleep_time=0.001, max_wait_time=0.01).error
591591
)
592592

593-
assert "Session `id0` is not yet running. Current status: Creating Host: foo.bar. Retrying in 0.1" in caplog.text
593+
assert "Session `id0` is not yet running. Current status: Creating Host: foo.bar. Retrying in 0.001" in caplog.text
594594

595595

596596
def test_wait_for_session_running(requests_mock: Mocker) -> None:
@@ -1024,11 +1024,11 @@ def test_dont_wait_forever(requests_mock: Mocker, caplog: LogCaptureFixture) ->
10241024

10251025
with caplog.at_level(logging.DEBUG):
10261026
assert (
1027-
"Instance is not running after waiting for 0.7"
1028-
in api.wait_for_instance_running("id0", max_wait_time=0.7).error
1027+
"Instance is not running after waiting for 0.01"
1028+
in api.wait_for_instance_running("id0", max_wait_time=0.01, sleep_time=0.001).error
10291029
)
10301030

1031-
assert "Instance `id0` is not yet running. Current status: creating. Retrying in 0.2 seconds..." in caplog.text
1031+
assert "Instance `id0` is not yet running. Current status: creating. Retrying in 0.001 seconds..." in caplog.text
10321032

10331033

10341034
def test_wait_for_instance_running(requests_mock: Mocker) -> None:
@@ -1099,12 +1099,14 @@ def test_wait_for_instance_deleting(requests_mock: Mocker) -> None:
10991099
def test_estimate_size(requests_mock: Mocker) -> None:
11001100
mock_auth_token(requests_mock)
11011101
requests_mock.post(
1102-
"https://api.neo4j.io/v1/instances/sizing",
1103-
json={"data": {"did_exceed_maximum": True, "min_required_memory": "307GB", "recommended_size": "96GB"}},
1102+
"https://api.neo4j.io/v1/graph-analytics/sessions/sizing",
1103+
json={"data": {"estimated_memory": "3070GB", "recommended_size": "512GB"}},
11041104
)
11051105

11061106
api = AuraApi("", "", project_id="some-tenant")
1107-
assert api.estimate_size(100, 10, [AlgorithmCategory.NODE_EMBEDDING]) == EstimationDetails("307GB", "96GB", True)
1107+
assert api.estimate_size(100, 1, 1, 10, 1, [AlgorithmCategory.NODE_EMBEDDING]) == EstimationDetails(
1108+
estimated_memory="3070GB", recommended_size="512GB"
1109+
)
11081110

11091111

11101112
def test_extract_id() -> None:
@@ -1215,3 +1217,22 @@ def test_parse_session_info_without_optionals() -> None:
12151217
project_id="tenant-1",
12161218
user_id="user-1",
12171219
)
1220+
1221+
1222+
def test_estimate_size_parsing() -> None:
1223+
assert EstimationDetails._memory_in_bytes("8GB") == 8589934592
1224+
assert EstimationDetails._memory_in_bytes("8G") == 8589934592
1225+
assert EstimationDetails._memory_in_bytes("512MB") == 536870912
1226+
assert EstimationDetails._memory_in_bytes("256KB") == 262144
1227+
assert EstimationDetails._memory_in_bytes("1024B") == 1024
1228+
assert EstimationDetails._memory_in_bytes("12345") == 12345
1229+
assert EstimationDetails._memory_in_bytes("8Gi") == 8589934592
1230+
assert EstimationDetails._memory_in_bytes("8gb") == 8589934592
1231+
1232+
1233+
def test_estimate_exceeds_maximum() -> None:
1234+
estimation = EstimationDetails(estimated_memory="16Gi", recommended_size="8Gi")
1235+
assert estimation.exceeds_recommended() is True
1236+
1237+
estimation = EstimationDetails(estimated_memory="8Gi", recommended_size="16Gi")
1238+
assert estimation.exceeds_recommended() is False

0 commit comments

Comments
 (0)