mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-21 06:33:41 +00:00
x
This commit is contained in:
@@ -41,7 +41,6 @@ from langchain_community.document_loaders.azlyrics import AZLyricsLoader
|
||||
from langchain_community.document_loaders.azure_ai_data import (
|
||||
AzureAIDataLoader,
|
||||
)
|
||||
from langchain_community.document_loaders.azure_ai_speech import AzureAISpeechLoader
|
||||
from langchain_community.document_loaders.azure_blob_storage_container import (
|
||||
AzureBlobStorageContainerLoader,
|
||||
)
|
||||
@@ -266,7 +265,6 @@ __all__ = [
|
||||
"AsyncHtmlLoader",
|
||||
"AthenaLoader",
|
||||
"AzureAIDataLoader",
|
||||
"AzureAISpeechLoader",
|
||||
"AzureAIDocumentIntelligenceLoader",
|
||||
"AzureBlobStorageContainerLoader",
|
||||
"AzureBlobStorageFileLoader",
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser
|
||||
|
||||
|
||||
class AzureAISpeechLoader(BaseLoader):
|
||||
"""Azure AI Speech Service Document Loader.
|
||||
|
||||
A document loader that can load an audio file from the local file system
|
||||
and transcribe it using Azure AI Speech Service.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import AzureAISpeechLoader
|
||||
|
||||
loader = AzureAISpeechParser(
|
||||
file_path="path/to/directory/example.wav",
|
||||
api_key="speech-api-key-from-azure",
|
||||
region="speech-api-region-from-azure"
|
||||
)
|
||||
|
||||
loader.lazy_load()
|
||||
"""
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
blob = Blob.from_path(self.file_path)
|
||||
return self.parser.parse(blob)
|
||||
|
||||
def lazy_load(self) -> List[Document]:
|
||||
return self.load()
|
||||
|
||||
def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None:
|
||||
"""
|
||||
Args:
|
||||
file_path: The path to the audio file.
|
||||
"""
|
||||
self.file_path = file_path
|
||||
self.parser = AzureAISpeechParser(**kwargs)
|
||||
@@ -80,7 +80,7 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
file_obj.name = f"part_{split_number}.mp3"
|
||||
|
||||
# Transcribe
|
||||
print(f"Transcribing part {split_number + 1}!") # noqa: T201
|
||||
logger.info(f"Transcribing part {split_number + 1}!") # noqa: T201
|
||||
attempts = 0
|
||||
while attempts < 3:
|
||||
try:
|
||||
@@ -93,10 +93,10 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
break
|
||||
except Exception as e:
|
||||
attempts += 1
|
||||
print(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201
|
||||
logger.error(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201
|
||||
time.sleep(5)
|
||||
else:
|
||||
print("Failed to transcribe after 3 attempts.") # noqa: T201
|
||||
logger.info("Failed to transcribe after 3 attempts.") # noqa: T201
|
||||
continue
|
||||
|
||||
yield Document(
|
||||
@@ -185,7 +185,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
rec_model = "openai/whisper-large"
|
||||
self.lang_model = lang_model if lang_model else rec_model
|
||||
|
||||
print("Using the following model: ", self.lang_model) # noqa: T201
|
||||
logger.info("Using the following model: ", self.lang_model) # noqa: T201
|
||||
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -232,7 +232,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
file_obj = io.BytesIO(audio.export(format="mp3").read())
|
||||
|
||||
# Transcribe
|
||||
print(f"Transcribing part {blob.path}!") # noqa: T201
|
||||
logger.info(f"Transcribing part {blob.path}!") # noqa: T201
|
||||
|
||||
y, sr = librosa.load(file_obj, sr=16000)
|
||||
|
||||
@@ -467,11 +467,11 @@ class AzureAISpeechParser(BaseBlobParser):
|
||||
"speaker_id": speaker_id,
|
||||
},
|
||||
)
|
||||
print(f"TRANSCRIBED:{evt_dict}")
|
||||
logger.info(f"TRANSCRIBED:{evt_dict}")
|
||||
raw_json_list.append(evt_dict)
|
||||
document_list.append(_doc)
|
||||
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
|
||||
print(
|
||||
logger.warning(
|
||||
"\tNOMATCH: Speech could not be TRANSCRIBED: {}".format(
|
||||
evt.result.no_match_details
|
||||
)
|
||||
@@ -523,7 +523,7 @@ class AzureAISpeechParser(BaseBlobParser):
|
||||
def stop_cb(evt: speechsdk.SessionEventArgs) -> None:
|
||||
# callback that signals to stop continuous recognition
|
||||
# upon receiving an event `evt`
|
||||
print("CLOSING on {}".format(evt))
|
||||
logger.info("CLOSING on {}".format(evt))
|
||||
nonlocal transcribing_stop
|
||||
transcribing_stop = True
|
||||
|
||||
@@ -556,5 +556,5 @@ class AzureAISpeechParser(BaseBlobParser):
|
||||
try:
|
||||
return recognize_from_file()
|
||||
except Exception as err:
|
||||
print("Encountered exception. {}".format(err))
|
||||
logger.error("Encountered exception. {}".format(err))
|
||||
raise err
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser
|
||||
|
||||
SPEECH_SERVICE_REGION = "eastasia"
|
||||
SPEECH_SERVICE_KEY = "someservicekey"
|
||||
|
||||
|
||||
# Loader for testing purposes only
|
||||
class _AzureAISpeechLoader(BaseLoader):
|
||||
"""Azure AI Speech Service Document Loader.
|
||||
A document loader that can load an audio file from the local file system
|
||||
and transcribe it using Azure AI Speech Service.
|
||||
|
||||
|
||||
Examples:
|
||||
.. code-block:: python
|
||||
from langchain_community.document_loaders import AzureAISpeechLoader
|
||||
loader = AzureAISpeechParser(
|
||||
file_path="path/to/directory/example.wav",
|
||||
api_key="speech-api-key-from-azure",
|
||||
region="speech-api-region-from-azure"
|
||||
)
|
||||
loader.lazy_load()
|
||||
"""
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
blob = Blob.from_path(self.file_path)
|
||||
return self.parser.parse(blob)
|
||||
|
||||
def lazy_load(self) -> List[Document]:
|
||||
return self.load()
|
||||
|
||||
def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None:
|
||||
"""
|
||||
Args:
|
||||
file_path: The path to the audio file.
|
||||
"""
|
||||
self.file_path = file_path
|
||||
self.parser = AzureAISpeechParser(**kwargs)
|
||||
|
||||
|
||||
def _get_audio_file_path() -> str:
|
||||
return "../test_audio/whatstheweatherlike.wav"
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region_auto_detect_languages() -> None:
|
||||
loader = _AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
region=SPEECH_SERVICE_REGION,
|
||||
auto_detect_languages=["zh-CN", "en-US"],
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region_language() -> None:
|
||||
loader = _AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
region=SPEECH_SERVICE_REGION,
|
||||
speech_recognition_language="en-US",
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region() -> None:
|
||||
loader = _AzureAISpeechLoader(
|
||||
_get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_endpoint() -> None:
|
||||
loader = _AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition"
|
||||
"/conversation/cognitiveservices/v1",
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
@@ -1,49 +0,0 @@
|
||||
from langchain_community.document_loaders import AzureAISpeechLoader
|
||||
|
||||
SPEECH_SERVICE_REGION = "eastasia"
|
||||
SPEECH_SERVICE_KEY = "c77dcf2aa5c04dd6b6613f77d9d9161d"
|
||||
|
||||
|
||||
def _get_audio_file_path() -> str:
|
||||
return "../test_audio/whatstheweatherlike.wav"
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region_auto_detect_languages() -> None:
|
||||
loader = AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
region=SPEECH_SERVICE_REGION,
|
||||
auto_detect_languages=["zh-CN", "en-US"],
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region_language() -> None:
|
||||
loader = AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
region=SPEECH_SERVICE_REGION,
|
||||
speech_recognition_language="en-US",
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_region() -> None:
|
||||
loader = AzureAISpeechLoader(
|
||||
_get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
|
||||
|
||||
def test_azure_speech_load_key_endpoint() -> None:
|
||||
loader = AzureAISpeechLoader(
|
||||
_get_audio_file_path(),
|
||||
api_key=SPEECH_SERVICE_KEY,
|
||||
endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition"
|
||||
"/conversation/cognitiveservices/v1",
|
||||
)
|
||||
documents = loader.lazy_load()
|
||||
assert "what" in documents[0].page_content.lower()
|
||||
Reference in New Issue
Block a user