diff --git a/libs/community/langchain_community/document_loaders/__init__.py b/libs/community/langchain_community/document_loaders/__init__.py index 220734ad008..10190996e38 100644 --- a/libs/community/langchain_community/document_loaders/__init__.py +++ b/libs/community/langchain_community/document_loaders/__init__.py @@ -41,7 +41,6 @@ from langchain_community.document_loaders.azlyrics import AZLyricsLoader from langchain_community.document_loaders.azure_ai_data import ( AzureAIDataLoader, ) -from langchain_community.document_loaders.azure_ai_speech import AzureAISpeechLoader from langchain_community.document_loaders.azure_blob_storage_container import ( AzureBlobStorageContainerLoader, ) @@ -266,7 +265,6 @@ __all__ = [ "AsyncHtmlLoader", "AthenaLoader", "AzureAIDataLoader", - "AzureAISpeechLoader", "AzureAIDocumentIntelligenceLoader", "AzureBlobStorageContainerLoader", "AzureBlobStorageFileLoader", diff --git a/libs/community/langchain_community/document_loaders/azure_ai_speech.py b/libs/community/langchain_community/document_loaders/azure_ai_speech.py deleted file mode 100644 index 7c2c2eecd96..00000000000 --- a/libs/community/langchain_community/document_loaders/azure_ai_speech.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -from typing import List, Optional - -from langchain_core.documents import Document - -from langchain_community.document_loaders.base import BaseLoader -from langchain_community.document_loaders.blob_loaders import Blob -from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser - - -class AzureAISpeechLoader(BaseLoader): - """Azure AI Speech Service Document Loader. - - A document loader that can load an audio file from the local file system - and transcribe it using Azure AI Speech Service. - - Examples: - - .. code-block:: python - - from langchain_community.document_loaders import AzureAISpeechLoader - - loader = AzureAISpeechParser( - file_path="path/to/directory/example.wav", - api_key="speech-api-key-from-azure", - region="speech-api-region-from-azure" - ) - - loader.lazy_load() - """ - - def load(self) -> List[Document]: - blob = Blob.from_path(self.file_path) - return self.parser.parse(blob) - - def lazy_load(self) -> List[Document]: - return self.load() - - def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None: - """ - Args: - file_path: The path to the audio file. - """ - self.file_path = file_path - self.parser = AzureAISpeechParser(**kwargs) diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py index 9b5ff87692e..5ddcf12be9a 100644 --- a/libs/community/langchain_community/document_loaders/parsers/audio.py +++ b/libs/community/langchain_community/document_loaders/parsers/audio.py @@ -80,7 +80,7 @@ class OpenAIWhisperParser(BaseBlobParser): file_obj.name = f"part_{split_number}.mp3" # Transcribe - print(f"Transcribing part {split_number + 1}!") # noqa: T201 + logger.info(f"Transcribing part {split_number + 1}!") # noqa: T201 attempts = 0 while attempts < 3: try: @@ -93,10 +93,10 @@ class OpenAIWhisperParser(BaseBlobParser): break except Exception as e: attempts += 1 - print(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201 + logger.error(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201 time.sleep(5) else: - print("Failed to transcribe after 3 attempts.") # noqa: T201 + logger.info("Failed to transcribe after 3 attempts.") # noqa: T201 continue yield Document( @@ -185,7 +185,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser): rec_model = "openai/whisper-large" self.lang_model = lang_model if lang_model else rec_model - print("Using the following model: ", self.lang_model) # noqa: T201 + logger.info("Using the following model: ", self.lang_model) # noqa: T201 self.batch_size = batch_size @@ -232,7 +232,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser): file_obj = io.BytesIO(audio.export(format="mp3").read()) # Transcribe - print(f"Transcribing part {blob.path}!") # noqa: T201 + logger.info(f"Transcribing part {blob.path}!") # noqa: T201 y, sr = librosa.load(file_obj, sr=16000) @@ -467,11 +467,11 @@ class AzureAISpeechParser(BaseBlobParser): "speaker_id": speaker_id, }, ) - print(f"TRANSCRIBED:{evt_dict}") + logger.info(f"TRANSCRIBED:{evt_dict}") raw_json_list.append(evt_dict) document_list.append(_doc) elif evt.result.reason == speechsdk.ResultReason.NoMatch: - print( + logger.warning( "\tNOMATCH: Speech could not be TRANSCRIBED: {}".format( evt.result.no_match_details ) @@ -523,7 +523,7 @@ class AzureAISpeechParser(BaseBlobParser): def stop_cb(evt: speechsdk.SessionEventArgs) -> None: # callback that signals to stop continuous recognition # upon receiving an event `evt` - print("CLOSING on {}".format(evt)) + logger.info("CLOSING on {}".format(evt)) nonlocal transcribing_stop transcribing_stop = True @@ -556,5 +556,5 @@ class AzureAISpeechParser(BaseBlobParser): try: return recognize_from_file() except Exception as err: - print("Encountered exception. {}".format(err)) + logger.error("Encountered exception. {}".format(err)) raise err diff --git a/libs/community/tests/integration_tests/document_loaders/parsers/test_azure_ai_speech_parser.py b/libs/community/tests/integration_tests/document_loaders/parsers/test_azure_ai_speech_parser.py new file mode 100644 index 00000000000..393c91eca47 --- /dev/null +++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_azure_ai_speech_parser.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from typing import List, Optional + +from langchain_core.documents import Document + +from langchain_community.document_loaders.base import BaseLoader +from langchain_community.document_loaders.blob_loaders import Blob +from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser + +SPEECH_SERVICE_REGION = "eastasia" +SPEECH_SERVICE_KEY = "someservicekey" + + +# Loader for testing purposes only +class _AzureAISpeechLoader(BaseLoader): + """Azure AI Speech Service Document Loader. + A document loader that can load an audio file from the local file system + and transcribe it using Azure AI Speech Service. + + + Examples: + .. code-block:: python + from langchain_community.document_loaders import AzureAISpeechLoader + loader = AzureAISpeechParser( + file_path="path/to/directory/example.wav", + api_key="speech-api-key-from-azure", + region="speech-api-region-from-azure" + ) + loader.lazy_load() + """ + + def load(self) -> List[Document]: + blob = Blob.from_path(self.file_path) + return self.parser.parse(blob) + + def lazy_load(self) -> List[Document]: + return self.load() + + def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None: + """ + Args: + file_path: The path to the audio file. + """ + self.file_path = file_path + self.parser = AzureAISpeechParser(**kwargs) + + +def _get_audio_file_path() -> str: + return "../test_audio/whatstheweatherlike.wav" + + +def test_azure_speech_load_key_region_auto_detect_languages() -> None: + loader = _AzureAISpeechLoader( + _get_audio_file_path(), + api_key=SPEECH_SERVICE_KEY, + region=SPEECH_SERVICE_REGION, + auto_detect_languages=["zh-CN", "en-US"], + ) + documents = loader.lazy_load() + assert "what" in documents[0].page_content.lower() + + +def test_azure_speech_load_key_region_language() -> None: + loader = _AzureAISpeechLoader( + _get_audio_file_path(), + api_key=SPEECH_SERVICE_KEY, + region=SPEECH_SERVICE_REGION, + speech_recognition_language="en-US", + ) + documents = loader.lazy_load() + assert "what" in documents[0].page_content.lower() + + +def test_azure_speech_load_key_region() -> None: + loader = _AzureAISpeechLoader( + _get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION + ) + documents = loader.lazy_load() + assert "what" in documents[0].page_content.lower() + + +def test_azure_speech_load_key_endpoint() -> None: + loader = _AzureAISpeechLoader( + _get_audio_file_path(), + api_key=SPEECH_SERVICE_KEY, + endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition" + "/conversation/cognitiveservices/v1", + ) + documents = loader.lazy_load() + assert "what" in documents[0].page_content.lower() diff --git a/libs/community/tests/unit_tests/document_loaders/test_audio.py b/libs/community/tests/unit_tests/document_loaders/test_audio.py deleted file mode 100644 index a66481934e5..00000000000 --- a/libs/community/tests/unit_tests/document_loaders/test_audio.py +++ /dev/null @@ -1,49 +0,0 @@ -from langchain_community.document_loaders import AzureAISpeechLoader - -SPEECH_SERVICE_REGION = "eastasia" -SPEECH_SERVICE_KEY = "c77dcf2aa5c04dd6b6613f77d9d9161d" - - -def _get_audio_file_path() -> str: - return "../test_audio/whatstheweatherlike.wav" - - -def test_azure_speech_load_key_region_auto_detect_languages() -> None: - loader = AzureAISpeechLoader( - _get_audio_file_path(), - api_key=SPEECH_SERVICE_KEY, - region=SPEECH_SERVICE_REGION, - auto_detect_languages=["zh-CN", "en-US"], - ) - documents = loader.lazy_load() - assert "what" in documents[0].page_content.lower() - - -def test_azure_speech_load_key_region_language() -> None: - loader = AzureAISpeechLoader( - _get_audio_file_path(), - api_key=SPEECH_SERVICE_KEY, - region=SPEECH_SERVICE_REGION, - speech_recognition_language="en-US", - ) - documents = loader.lazy_load() - assert "what" in documents[0].page_content.lower() - - -def test_azure_speech_load_key_region() -> None: - loader = AzureAISpeechLoader( - _get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION - ) - documents = loader.lazy_load() - assert "what" in documents[0].page_content.lower() - - -def test_azure_speech_load_key_endpoint() -> None: - loader = AzureAISpeechLoader( - _get_audio_file_path(), - api_key=SPEECH_SERVICE_KEY, - endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition" - "/conversation/cognitiveservices/v1", - ) - documents = loader.lazy_load() - assert "what" in documents[0].page_content.lower()