This commit is contained in:
Eugene Yurtsev
2024-02-28 16:07:42 -05:00
parent fd3a3a0ad1
commit f56f7e1895
5 changed files with 100 additions and 106 deletions

View File

@@ -41,7 +41,6 @@ from langchain_community.document_loaders.azlyrics import AZLyricsLoader
from langchain_community.document_loaders.azure_ai_data import (
AzureAIDataLoader,
)
from langchain_community.document_loaders.azure_ai_speech import AzureAISpeechLoader
from langchain_community.document_loaders.azure_blob_storage_container import (
AzureBlobStorageContainerLoader,
)
@@ -266,7 +265,6 @@ __all__ = [
"AsyncHtmlLoader",
"AthenaLoader",
"AzureAIDataLoader",
"AzureAISpeechLoader",
"AzureAIDocumentIntelligenceLoader",
"AzureBlobStorageContainerLoader",
"AzureBlobStorageFileLoader",

View File

@@ -1,46 +0,0 @@
from __future__ import annotations
from typing import List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.blob_loaders import Blob
from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser
class AzureAISpeechLoader(BaseLoader):
"""Azure AI Speech Service Document Loader.
A document loader that can load an audio file from the local file system
and transcribe it using Azure AI Speech Service.
Examples:
.. code-block:: python
from langchain_community.document_loaders import AzureAISpeechLoader
loader = AzureAISpeechParser(
file_path="path/to/directory/example.wav",
api_key="speech-api-key-from-azure",
region="speech-api-region-from-azure"
)
loader.lazy_load()
"""
def load(self) -> List[Document]:
blob = Blob.from_path(self.file_path)
return self.parser.parse(blob)
def lazy_load(self) -> List[Document]:
return self.load()
def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None:
"""
Args:
file_path: The path to the audio file.
"""
self.file_path = file_path
self.parser = AzureAISpeechParser(**kwargs)

View File

@@ -80,7 +80,7 @@ class OpenAIWhisperParser(BaseBlobParser):
file_obj.name = f"part_{split_number}.mp3"
# Transcribe
print(f"Transcribing part {split_number + 1}!") # noqa: T201
logger.info(f"Transcribing part {split_number + 1}!") # noqa: T201
attempts = 0
while attempts < 3:
try:
@@ -93,10 +93,10 @@ class OpenAIWhisperParser(BaseBlobParser):
break
except Exception as e:
attempts += 1
print(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201
logger.error(f"Attempt {attempts} failed. Exception: {str(e)}") # noqa: T201
time.sleep(5)
else:
print("Failed to transcribe after 3 attempts.") # noqa: T201
logger.info("Failed to transcribe after 3 attempts.") # noqa: T201
continue
yield Document(
@@ -185,7 +185,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
rec_model = "openai/whisper-large"
self.lang_model = lang_model if lang_model else rec_model
print("Using the following model: ", self.lang_model) # noqa: T201
logger.info("Using the following model: ", self.lang_model) # noqa: T201
self.batch_size = batch_size
@@ -232,7 +232,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
file_obj = io.BytesIO(audio.export(format="mp3").read())
# Transcribe
print(f"Transcribing part {blob.path}!") # noqa: T201
logger.info(f"Transcribing part {blob.path}!") # noqa: T201
y, sr = librosa.load(file_obj, sr=16000)
@@ -467,11 +467,11 @@ class AzureAISpeechParser(BaseBlobParser):
"speaker_id": speaker_id,
},
)
print(f"TRANSCRIBED:{evt_dict}")
logger.info(f"TRANSCRIBED:{evt_dict}")
raw_json_list.append(evt_dict)
document_list.append(_doc)
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
print(
logger.warning(
"\tNOMATCH: Speech could not be TRANSCRIBED: {}".format(
evt.result.no_match_details
)
@@ -523,7 +523,7 @@ class AzureAISpeechParser(BaseBlobParser):
def stop_cb(evt: speechsdk.SessionEventArgs) -> None:
# callback that signals to stop continuous recognition
# upon receiving an event `evt`
print("CLOSING on {}".format(evt))
logger.info("CLOSING on {}".format(evt))
nonlocal transcribing_stop
transcribing_stop = True
@@ -556,5 +556,5 @@ class AzureAISpeechParser(BaseBlobParser):
try:
return recognize_from_file()
except Exception as err:
print("Encountered exception. {}".format(err))
logger.error("Encountered exception. {}".format(err))
raise err

View File

@@ -0,0 +1,91 @@
from __future__ import annotations
from typing import List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.blob_loaders import Blob
from langchain_community.document_loaders.parsers.audio import AzureAISpeechParser
SPEECH_SERVICE_REGION = "eastasia"
SPEECH_SERVICE_KEY = "someservicekey"
# Loader for testing purposes only
class _AzureAISpeechLoader(BaseLoader):
"""Azure AI Speech Service Document Loader.
A document loader that can load an audio file from the local file system
and transcribe it using Azure AI Speech Service.
Examples:
.. code-block:: python
from langchain_community.document_loaders import AzureAISpeechLoader
loader = AzureAISpeechParser(
file_path="path/to/directory/example.wav",
api_key="speech-api-key-from-azure",
region="speech-api-region-from-azure"
)
loader.lazy_load()
"""
def load(self) -> List[Document]:
blob = Blob.from_path(self.file_path)
return self.parser.parse(blob)
def lazy_load(self) -> List[Document]:
return self.load()
def __init__(self, file_path: str, **kwargs: Optional[list[str] | str]) -> None:
"""
Args:
file_path: The path to the audio file.
"""
self.file_path = file_path
self.parser = AzureAISpeechParser(**kwargs)
def _get_audio_file_path() -> str:
return "../test_audio/whatstheweatherlike.wav"
def test_azure_speech_load_key_region_auto_detect_languages() -> None:
loader = _AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
region=SPEECH_SERVICE_REGION,
auto_detect_languages=["zh-CN", "en-US"],
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_region_language() -> None:
loader = _AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
region=SPEECH_SERVICE_REGION,
speech_recognition_language="en-US",
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_region() -> None:
loader = _AzureAISpeechLoader(
_get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_endpoint() -> None:
loader = _AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition"
"/conversation/cognitiveservices/v1",
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()

View File

@@ -1,49 +0,0 @@
from langchain_community.document_loaders import AzureAISpeechLoader
SPEECH_SERVICE_REGION = "eastasia"
SPEECH_SERVICE_KEY = "c77dcf2aa5c04dd6b6613f77d9d9161d"
def _get_audio_file_path() -> str:
return "../test_audio/whatstheweatherlike.wav"
def test_azure_speech_load_key_region_auto_detect_languages() -> None:
loader = AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
region=SPEECH_SERVICE_REGION,
auto_detect_languages=["zh-CN", "en-US"],
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_region_language() -> None:
loader = AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
region=SPEECH_SERVICE_REGION,
speech_recognition_language="en-US",
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_region() -> None:
loader = AzureAISpeechLoader(
_get_audio_file_path(), api_key=SPEECH_SERVICE_KEY, region=SPEECH_SERVICE_REGION
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()
def test_azure_speech_load_key_endpoint() -> None:
loader = AzureAISpeechLoader(
_get_audio_file_path(),
api_key=SPEECH_SERVICE_KEY,
endpoint=f"wss://{SPEECH_SERVICE_REGION}.stt.speech.microsoft.com/speech/recognition"
"/conversation/cognitiveservices/v1",
)
documents = loader.lazy_load()
assert "what" in documents[0].page_content.lower()