Skip to content

Commit 1753960

Browse files
dmccrystals0h3yl
authored andcommitted
feat: enable retrieval of existing transcripts
GitOrigin-RevId: 369957adcacc25eaf6191b60d0cf06176a7ffbbb
1 parent 45d8483 commit 1753960

File tree

4 files changed

+170
-0
lines changed

4 files changed

+170
-0
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,3 +678,38 @@ import assemblyai as aai
678678

679679
aai.settings.polling_interval = 1.0
680680
```
681+
682+
## Retrieving Existing Transcripts
683+
684+
### Retrieving a Single Transcript
685+
686+
If you previously created a transcript, you can use its ID to retrieve it later.
687+
688+
```python
689+
import assemblyai as aai
690+
691+
transcript = aai.Transcript.get_by_id("<TRANSCRIPT_ID>")
692+
693+
print(transcript.id)
694+
print(transcript.text)
695+
```
696+
697+
### Retrieving Multiple Transcripts as a Group
698+
699+
You can also retrieve multiple existing transcripts and combine them into a single `TranscriptGroup` object. This allows you to perform operations on the transcript group as a single unit, such as querying the combined transcripts with LeMUR.
700+
701+
```python
702+
import assemblyai as aai
703+
704+
transcript_group = aai.TranscriptGroup.get_by_ids(["<TRANSCRIPT_ID_1>", "<TRANSCRIPT_ID_2>"])
705+
706+
summary = transcript_group.lemur.summarize(context="Customers asking for cars", answer_format="TLDR")
707+
708+
print(summary)
709+
```
710+
711+
### Retrieving Transcripts Asynchronously
712+
713+
Both `Transcript.get_by_id` and `TranscriptGroup.get_by_ids` have asynchronous counterparts, `Transcript.get_by_id_async` and `TranscriptGroup.get_by_ids_async`, respectively. These functions immediately return a `Future` object, rather than blocking until the transcript(s) are retrieved.
714+
715+
See the above section on [Synchronous vs Asynchronous](#synchronous-vs-asynchronous) for more information.

assemblyai/transcriber.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,30 @@ def from_response(
236236

237237
return self
238238

239+
@classmethod
240+
def get_by_id(cls, transcript_id: str) -> Self:
241+
"""Fetch an existing transcript. Blocks until the transcript is completed.
242+
243+
Args:
244+
transcript_id: the id of the transcript to fetch
245+
246+
Returns:
247+
The transcript object identified by the given id.
248+
"""
249+
return cls(transcript_id=transcript_id).wait_for_completion()
250+
251+
@classmethod
252+
def get_by_id_async(cls, transcript_id: str) -> concurrent.futures.Future[Self]:
253+
"""Fetch an existing transcript asynchronously.
254+
255+
Args:
256+
transcript_id: the id of the transcript to fetch
257+
258+
Returns:
259+
A future that will resolve to the transcript object identified by the given id.
260+
"""
261+
return cls(transcript_id=transcript_id).wait_for_completion_async()
262+
239263
@property
240264
def id(self) -> Optional[str]:
241265
"The unique identifier of your transcription"
@@ -526,6 +550,7 @@ def __init__(
526550
transcript_ids=transcript_ids,
527551
client=self._client,
528552
)
553+
self._executor = concurrent.futures.ThreadPoolExecutor()
529554

530555
@property
531556
def transcripts(self) -> List[Transcript]:
@@ -542,6 +567,16 @@ def __iter__(self) -> Iterator[Transcript]:
542567

543568
return iter(self.transcripts)
544569

570+
@classmethod
571+
def get_by_ids(cls, transcript_ids: List[str]) -> Self:
572+
return cls(transcript_ids=transcript_ids).wait_for_completion()
573+
574+
@classmethod
575+
def get_by_ids_async(
576+
cls, transcript_ids: List[str]
577+
) -> concurrent.futures.Future[Self]:
578+
return cls(transcript_ids=transcript_ids).wait_for_completion_async()
579+
545580
@property
546581
def status(self) -> types.TranscriptStatus:
547582
"""
@@ -595,6 +630,11 @@ def wait_for_completion(self) -> Self:
595630

596631
return self
597632

633+
def wait_for_completion_async(
634+
self,
635+
) -> concurrent.futures.Future[Self]:
636+
return self._executor.submit(self.wait_for_completion)
637+
598638

599639
class _TranscriberImpl:
600640
"""

tests/unit/test_transcript.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,44 @@ def test_get_sentences_and_paragraphs_fails(httpx_mock: HTTPXMock):
304304

305305
# check whether we mocked everything
306306
assert len(httpx_mock.get_requests()) == 2
307+
308+
309+
def test_get_by_id(httpx_mock: HTTPXMock):
310+
transcript_id = "123"
311+
mock_transcript_response = factories.generate_dict_factory(
312+
factories.TranscriptCompletedResponseFactory
313+
)()
314+
httpx_mock.add_response(
315+
url=f"{aai.settings.base_url}/transcript/{transcript_id}",
316+
status_code=httpx.codes.OK,
317+
method="GET",
318+
json=mock_transcript_response,
319+
)
320+
321+
transcript = aai.Transcript.get_by_id(transcript_id)
322+
323+
assert isinstance(transcript, aai.Transcript)
324+
assert transcript.status == aai.TranscriptStatus.completed
325+
assert transcript.id == transcript_id
326+
assert transcript.error is None
327+
328+
329+
def test_get_by_id_async(httpx_mock: HTTPXMock):
330+
transcript_id = "123"
331+
mock_transcript_response = factories.generate_dict_factory(
332+
factories.TranscriptCompletedResponseFactory
333+
)()
334+
httpx_mock.add_response(
335+
url=f"{aai.settings.base_url}/transcript/{transcript_id}",
336+
status_code=httpx.codes.OK,
337+
method="GET",
338+
json=mock_transcript_response,
339+
)
340+
341+
transcript_future = aai.Transcript.get_by_id_async(transcript_id)
342+
transcript = transcript_future.result()
343+
344+
assert isinstance(transcript, aai.Transcript)
345+
assert transcript.status == aai.TranscriptStatus.completed
346+
assert transcript.id == transcript_id
347+
assert transcript.error is None

tests/unit/test_transcript_group.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import uuid
22

3+
import httpx
4+
from pytest_httpx import HTTPXMock
5+
36
import assemblyai as aai
47
import assemblyai as aai
58
from tests.unit import factories
@@ -71,3 +74,54 @@ def test_transcript_group_check_status():
7174

7275
transcript_group.add_transcript(transcript_error)
7376
assert transcript_group.status == aai.TranscriptStatus.error
77+
78+
79+
def test_get_by_ids(httpx_mock: HTTPXMock):
80+
transcript_ids = ["123", "456"]
81+
mock_transcript_response = factories.generate_dict_factory(
82+
factories.TranscriptCompletedResponseFactory
83+
)()
84+
for transcript_id in transcript_ids:
85+
httpx_mock.add_response(
86+
url=f"{aai.settings.base_url}/transcript/{transcript_id}",
87+
status_code=httpx.codes.OK,
88+
method="GET",
89+
json=mock_transcript_response,
90+
)
91+
92+
transcript_group = aai.TranscriptGroup.get_by_ids(transcript_ids)
93+
94+
assert isinstance(transcript_group, aai.TranscriptGroup)
95+
assert transcript_group.status == aai.TranscriptStatus.completed
96+
for transcript in transcript_group:
97+
assert transcript.id in transcript_ids
98+
transcript_ids.remove(transcript.id)
99+
100+
assert transcript.error is None
101+
assert len(transcript_ids) == 0
102+
103+
104+
def test_get_by_id_async(httpx_mock: HTTPXMock):
105+
transcript_ids = ["123", "456"]
106+
mock_transcript_response = factories.generate_dict_factory(
107+
factories.TranscriptCompletedResponseFactory
108+
)()
109+
for transcript_id in transcript_ids:
110+
httpx_mock.add_response(
111+
url=f"{aai.settings.base_url}/transcript/{transcript_id}",
112+
status_code=httpx.codes.OK,
113+
method="GET",
114+
json=mock_transcript_response,
115+
)
116+
117+
transcript_group_future = aai.TranscriptGroup.get_by_ids_async(transcript_ids)
118+
transcript_group = transcript_group_future.result()
119+
120+
assert isinstance(transcript_group, aai.TranscriptGroup)
121+
assert transcript_group.status == aai.TranscriptStatus.completed
122+
for transcript in transcript_group:
123+
assert transcript.id in transcript_ids
124+
transcript_ids.remove(transcript.id)
125+
126+
assert transcript.error is None
127+
assert len(transcript_ids) == 0

0 commit comments

Comments
 (0)