From 1d7414a3718ef6025db79a428ad664122b7c439e Mon Sep 17 00:00:00 2001 From: Aiden Le <37786342+aidendle94@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:27:56 -0500 Subject: [PATCH] Feature: Add openai_api_model attribute to Doctran models (#7868) - Description: Added the ability to define the open AI model. - Issue: Currently the Doctran instance uses gpt-4 by default, this does not work if the user has no access to gpt -4. - rlancemartin, @eyurtsev, @baskaryan --------- Co-authored-by: Bagatur --- .../document_transformers/doctran_text_extract.py | 8 +++++++- langchain/document_transformers/doctran_text_qa.py | 13 +++++++++++-- .../document_transformers/doctran_text_translate.py | 12 ++++++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/langchain/document_transformers/doctran_text_extract.py b/langchain/document_transformers/doctran_text_extract.py index 6ff8b727e5d..fc618dab332 100644 --- a/langchain/document_transformers/doctran_text_extract.py +++ b/langchain/document_transformers/doctran_text_extract.py @@ -53,11 +53,15 @@ class DoctranPropertyExtractor(BaseDocumentTransformer): self, properties: List[dict], openai_api_key: Optional[str] = None, + openai_api_model: Optional[str] = None, ) -> None: self.properties = properties self.openai_api_key = openai_api_key or get_from_env( "openai_api_key", "OPENAI_API_KEY" ) + self.openai_api_model = openai_api_model or get_from_env( + "openai_api_model", "OPENAI_API_MODEL" + ) def transform_documents( self, documents: Sequence[Document], **kwargs: Any @@ -71,7 +75,9 @@ class DoctranPropertyExtractor(BaseDocumentTransformer): try: from doctran import Doctran, ExtractProperty - doctran = Doctran(openai_api_key=self.openai_api_key) + doctran = Doctran( + openai_api_key=self.openai_api_key, openai_model=self.openai_api_model + ) except ImportError: raise ImportError( "Install doctran to use this parser. (pip install doctran)" diff --git a/langchain/document_transformers/doctran_text_qa.py b/langchain/document_transformers/doctran_text_qa.py index 3031097b19d..c77c9dfe8a0 100644 --- a/langchain/document_transformers/doctran_text_qa.py +++ b/langchain/document_transformers/doctran_text_qa.py @@ -21,10 +21,17 @@ class DoctranQATransformer(BaseDocumentTransformer): transformed_document = await qa_transformer.atransform_documents(documents) """ - def __init__(self, openai_api_key: Optional[str] = None) -> None: + def __init__( + self, + openai_api_key: Optional[str] = None, + openai_api_model: Optional[str] = None, + ) -> None: self.openai_api_key = openai_api_key or get_from_env( "openai_api_key", "OPENAI_API_KEY" ) + self.openai_api_model = openai_api_model or get_from_env( + "openai_api_model", "OPENAI_API_MODEL" + ) def transform_documents( self, documents: Sequence[Document], **kwargs: Any @@ -38,7 +45,9 @@ class DoctranQATransformer(BaseDocumentTransformer): try: from doctran import Doctran - doctran = Doctran(openai_api_key=self.openai_api_key) + doctran = Doctran( + openai_api_key=self.openai_api_key, openai_model=self.openai_api_model + ) except ImportError: raise ImportError( "Install doctran to use this parser. (pip install doctran)" diff --git a/langchain/document_transformers/doctran_text_translate.py b/langchain/document_transformers/doctran_text_translate.py index dc26f0aaf42..219c8fd4d22 100644 --- a/langchain/document_transformers/doctran_text_translate.py +++ b/langchain/document_transformers/doctran_text_translate.py @@ -23,11 +23,17 @@ class DoctranTextTranslator(BaseDocumentTransformer): """ def __init__( - self, openai_api_key: Optional[str] = None, language: str = "english" + self, + openai_api_key: Optional[str] = None, + language: str = "english", + openai_api_model: Optional[str] = None, ) -> None: self.openai_api_key = openai_api_key or get_from_env( "openai_api_key", "OPENAI_API_KEY" ) + self.openai_api_model = openai_api_model or get_from_env( + "openai_api_model", "OPENAI_API_MODEL" + ) self.language = language def transform_documents( @@ -42,7 +48,9 @@ class DoctranTextTranslator(BaseDocumentTransformer): try: from doctran import Doctran - doctran = Doctran(openai_api_key=self.openai_api_key) + doctran = Doctran( + openai_api_key=self.openai_api_key, openai_model=self.openai_api_model + ) except ImportError: raise ImportError( "Install doctran to use this parser. (pip install doctran)"