@@ -550,19 +550,17 @@ class RawTranscriptionConfig(BaseModel):
550550 """
551551 Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
552552
553- Automatic Language Detection is supported for the following languages:
553+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
554+ """
554555
555- - English
556- - Spanish
557- - French
558- - German
559- - Italian
560- - Portuguese
561- - Dutch
556+ language_confidence_threshold : Optional [float ]
557+ """
558+ The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
559+ if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
562560 """
563561
564562 speech_threshold : Optional [float ]
565- "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
563+ "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive. "
566564
567565 speech_model : Optional [SpeechModel ]
568566 """
@@ -608,6 +606,7 @@ def __init__(
608606 summary_type : Optional [SummarizationType ] = None ,
609607 auto_highlights : Optional [bool ] = None ,
610608 language_detection : Optional [bool ] = None ,
609+ language_confidence_threshold : Optional [float ] = None ,
611610 speech_threshold : Optional [float ] = None ,
612611 raw_transcription_config : Optional [RawTranscriptionConfig ] = None ,
613612 speech_model : Optional [SpeechModel ] = None ,
@@ -644,8 +643,10 @@ def __init__(
644643 summary_model: The summarization model to use in case `summarization` is enabled
645644 summary_type: The summarization type to use in case `summarization` is enabled
646645 auto_highlights: Detect important phrases and words in your transcription text.
647- language_detection: Identify the dominant language that’s spoken in an audio file, and route the file to the appropriate model for the detected language.
648- speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive
646+ language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
647+ language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
648+ An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
649+ speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
649650 raw_transcription_config: Create the config from a `RawTranscriptionConfig`
650651 """
651652 self ._raw_transcription_config = raw_transcription_config
@@ -691,6 +692,7 @@ def __init__(
691692 )
692693 self .auto_highlights = auto_highlights
693694 self .language_detection = language_detection
695+ self .language_confidence_threshold = language_confidence_threshold
694696 self .speech_threshold = speech_threshold
695697 self .speech_model = speech_model
696698
@@ -1021,19 +1023,26 @@ def language_detection(self, enable: Optional[bool]) -> None:
10211023 """
10221024 Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
10231025
1024- Automatic Language Detection is supported for the following languages:
1025-
1026- - English
1027- - Spanish
1028- - French
1029- - German
1030- - Italian
1031- - Portuguese
1032- - Dutch
1026+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
10331027 """
10341028
10351029 self ._raw_transcription_config .language_detection = enable
10361030
1031+ @property
1032+ def language_confidence_threshold (self ) -> Optional [float ]:
1033+ "Returns the confidence threshold that must be reached for automatic language detection."
1034+
1035+ return self ._raw_transcription_config .language_confidence_threshold
1036+
1037+ @language_confidence_threshold .setter
1038+ def language_confidence_threshold (self , threshold : Optional [float ]) -> None :
1039+ """
1040+ Set the confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
1041+ if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
1042+ """
1043+
1044+ self ._raw_transcription_config .language_confidence_threshold = threshold
1045+
10371046 @property
10381047 def speech_threshold (self ) -> Optional [float ]:
10391048 "Returns the current speech threshold."
@@ -1042,10 +1051,7 @@ def speech_threshold(self) -> Optional[float]:
10421051
10431052 @speech_threshold .setter
10441053 def speech_threshold (self , threshold : Optional [float ]) -> None :
1045- "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
1046-
1047- if threshold is not None and (threshold < 0 or threshold > 1 ):
1048- raise ValueError ("speech_threshold must be between 0 and 1 (inclusive)." )
1054+ "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
10491055
10501056 self ._raw_transcription_config .speech_threshold = threshold
10511057
@@ -1638,17 +1644,15 @@ class BaseTranscript(BaseModel):
16381644 """
16391645 Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
16401646
1641- Automatic Language Detection is supported for the following languages:
1642-
1643- - English
1644- - Spanish
1645- - French
1646- - German
1647- - Italian
1648- - Portuguese
1649- - Dutch
1647+ See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
16501648 """
16511649
1650+ language_confidence_threshold : Optional [float ]
1651+ "The confidence threshold that must be reached if `language_detection` is enabled."
1652+
1653+ language_confidence : Optional [float ]
1654+ "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."
1655+
16521656 speech_threshold : Optional [float ]
16531657 "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
16541658
0 commit comments