mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-05 14:43:08 +00:00
community: Make doctran synchronous (#15264)
### Description
I found that the methods in [the doctran
library](https://github.com/psychic-api/doctran) have been restructured
into [synchronized
versions](14944a59f7
),
And [the example
ipynb](https://github.com/psychic-api/doctran/blob/main/examples.ipynb)
also shows that the code is synchronized, but the README has not been
updated yet.
so we need to modify the code and update the documentation.
### Issue
https://github.com/langchain-ai/langchain/issues/14645
This commit is contained in:
parent
9a16590aa9
commit
a464eb4394
@ -202,7 +202,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"extracted_document = await property_extractor.atransform_documents(\n",
|
||||
"extracted_document = property_extractor.transform_documents(\n",
|
||||
" documents, properties=properties\n",
|
||||
")"
|
||||
]
|
||||
@ -224,10 +224,9 @@
|
||||
" \"Jane Smith\",\n",
|
||||
" \"Michael Johnson\",\n",
|
||||
" \"Sarah Thompson\",\n",
|
||||
" \"David Rodriguez\",\n",
|
||||
" \"Jason Fan\"\n",
|
||||
" \"David Rodriguez\"\n",
|
||||
" ],\n",
|
||||
" \"eli5\": \"This is an email from the CEO, Jason Fan, giving updates about different areas in the company. He talks about new security measures and praises John Doe for his work. He also mentions new hires and praises Jane Smith for her work in customer service. The CEO reminds everyone about the upcoming benefits enrollment and says to contact Michael Johnson with any questions. He talks about the marketing team's work and praises Sarah Thompson for increasing their social media followers. There's also a product launch event on July 15th. Lastly, he talks about the research and development projects and praises David Rodriguez for his work. There's a brainstorming session on July 10th.\"\n",
|
||||
" \"eli5\": \"This email provides important updates and discussions on various topics. It mentions the implementation of security and privacy measures, HR updates and employee benefits, marketing initiatives and campaigns, and research and development projects. It recognizes the contributions of John Doe, Jane Smith, Michael Johnson, Sarah Thompson, and David Rodriguez. It also reminds everyone to adhere to data protection policies, enroll in the employee benefits program, attend the upcoming product launch event, and share ideas for new projects during the R&D brainstorming session.\"\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
@ -261,7 +260,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -158,7 +158,7 @@
|
||||
"source": [
|
||||
"documents = [Document(page_content=sample_text)]\n",
|
||||
"qa_transformer = DoctranQATransformer()\n",
|
||||
"transformed_document = await qa_transformer.atransform_documents(documents)"
|
||||
"transformed_document = qa_transformer.transform_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -185,44 +185,40 @@
|
||||
" \"answer\": \"The purpose of this document is to provide important updates and discuss various topics that require the team's attention.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who is responsible for enhancing the network security?\",\n",
|
||||
" \"answer\": \"John Doe from the IT department is responsible for enhancing the network security.\"\n",
|
||||
" \"question\": \"What should be done if someone comes across potential security risks or incidents?\",\n",
|
||||
" \"answer\": \"If someone comes across potential security risks or incidents, they should report them immediately to the dedicated team at security@example.com.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Where should potential security risks or incidents be reported?\",\n",
|
||||
" \"answer\": \"Potential security risks or incidents should be reported to the dedicated team at security@example.com.\"\n",
|
||||
" \"question\": \"Who is commended for enhancing network security?\",\n",
|
||||
" \"answer\": \"John Doe from the IT department is commended for enhancing network security.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been recognized for outstanding performance in customer service?\",\n",
|
||||
" \"answer\": \"Jane Smith has been recognized for her outstanding performance in customer service.\"\n",
|
||||
" \"question\": \"Who should be contacted for assistance with employee benefits?\",\n",
|
||||
" \"answer\": \"For assistance with employee benefits, HR representative Michael Johnson should be contacted. His phone number is 418-492-3850, and his email is michael.johnson@example.com.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the open enrollment period for the employee benefits program?\",\n",
|
||||
" \"answer\": \"The document does not specify the exact dates for the open enrollment period for the employee benefits program, but it mentions that it is fast approaching.\"\n",
|
||||
" \"question\": \"Who has made significant contributions to their respective departments?\",\n",
|
||||
" \"answer\": \"Several new team members have made significant contributions to their respective departments.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who should be contacted for questions or assistance regarding the employee benefits program?\",\n",
|
||||
" \"answer\": \"For questions or assistance regarding the employee benefits program, the HR representative, Michael Johnson, should be contacted.\"\n",
|
||||
" \"question\": \"Who is recognized for outstanding performance in customer service?\",\n",
|
||||
" \"answer\": \"Jane Smith is recognized for outstanding performance in customer service.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been acknowledged for managing the company's social media platforms?\",\n",
|
||||
" \"answer\": \"Sarah Thompson has been acknowledged for managing the company's social media platforms.\"\n",
|
||||
" \"question\": \"Who has successfully increased the follower base on social media?\",\n",
|
||||
" \"answer\": \"Sarah Thompson has successfully increased the follower base on social media.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the upcoming product launch event?\",\n",
|
||||
" \"answer\": \"The upcoming product launch event is on July 15th.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who has been recognized for their contributions to the development of the company's technology?\",\n",
|
||||
" \"answer\": \"David Rodriguez has been recognized for his contributions to the development of the company's technology.\"\n",
|
||||
" \"question\": \"Who is acknowledged for their exceptional work as project lead?\",\n",
|
||||
" \"answer\": \"David Rodriguez is acknowledged for his exceptional work as project lead.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"When is the monthly R&D brainstorming session?\",\n",
|
||||
" \"question\": \"When is the monthly R&D brainstorming session scheduled?\",\n",
|
||||
" \"answer\": \"The monthly R&D brainstorming session is scheduled for July 10th.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": \"Who should be contacted for questions or concerns regarding the topics discussed in the document?\",\n",
|
||||
" \"answer\": \"For questions or concerns regarding the topics discussed in the document, Jason Fan, the Cofounder & CEO, should be contacted.\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
@ -230,16 +226,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"transformed_document = await qa_transformer.atransform_documents(documents)\n",
|
||||
"transformed_document = qa_transformer.transform_documents(documents)\n",
|
||||
"print(json.dumps(transformed_document[0].metadata, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -258,7 +247,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -34,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -125,51 +125,49 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"translated_document = await qa_translator.atransform_documents(documents)"
|
||||
"translated_document = qa_translator.transform_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Generado con ChatGPT]\n",
|
||||
"Documento Confidencial - Solo para Uso Interno\n",
|
||||
"\n",
|
||||
"Documento confidencial - Solo para uso interno\n",
|
||||
"Fecha: 1 de Julio de 2023\n",
|
||||
"\n",
|
||||
"Fecha: 1 de julio de 2023\n",
|
||||
"Asunto: Actualizaciones y Discusiones sobre Varios Temas\n",
|
||||
"\n",
|
||||
"Asunto: Actualizaciones y discusiones sobre varios temas\n",
|
||||
"\n",
|
||||
"Estimado equipo,\n",
|
||||
"Estimado Equipo,\n",
|
||||
"\n",
|
||||
"Espero que este correo electrónico les encuentre bien. En este documento, me gustaría proporcionarles algunas actualizaciones importantes y discutir varios temas que requieren nuestra atención. Por favor, traten la información contenida aquí como altamente confidencial.\n",
|
||||
"\n",
|
||||
"Medidas de seguridad y privacidad\n",
|
||||
"Como parte de nuestro compromiso continuo para garantizar la seguridad y privacidad de los datos de nuestros clientes, hemos implementado medidas robustas en todos nuestros sistemas. Nos gustaría elogiar a John Doe (correo electrónico: john.doe@example.com) del departamento de TI por su diligente trabajo en mejorar nuestra seguridad de red. En adelante, recordamos amablemente a todos que se adhieran estrictamente a nuestras políticas y directrices de protección de datos. Además, si se encuentran con cualquier riesgo de seguridad o incidente potencial, por favor repórtelo inmediatamente a nuestro equipo dedicado en security@example.com.\n",
|
||||
"Medidas de Seguridad y Privacidad\n",
|
||||
"Como parte de nuestro compromiso continuo de garantizar la seguridad y privacidad de los datos de nuestros clientes, hemos implementado medidas sólidas en todos nuestros sistemas. Nos gustaría elogiar a John Doe (correo electrónico: john.doe@example.com) del departamento de TI por su diligente trabajo en mejorar nuestra seguridad de red. En el futuro, recordamos amablemente a todos que se adhieran estrictamente a nuestras políticas y pautas de protección de datos. Además, si encuentran algún riesgo o incidente de seguridad potencial, por favor, repórtelo de inmediato a nuestro equipo dedicado en security@example.com.\n",
|
||||
"\n",
|
||||
"Actualizaciones de RRHH y beneficios para empleados\n",
|
||||
"Recientemente, dimos la bienvenida a varios nuevos miembros del equipo que han hecho contribuciones significativas a sus respectivos departamentos. Me gustaría reconocer a Jane Smith (SSN: 049-45-5928) por su sobresaliente rendimiento en el servicio al cliente. Jane ha recibido constantemente comentarios positivos de nuestros clientes. Además, recuerden que el período de inscripción abierta para nuestro programa de beneficios para empleados se acerca rápidamente. Si tienen alguna pregunta o necesitan asistencia, por favor contacten a nuestro representante de RRHH, Michael Johnson (teléfono: 418-492-3850, correo electrónico: michael.johnson@example.com).\n",
|
||||
"Actualizaciones de Recursos Humanos y Beneficios para Empleados\n",
|
||||
"Recientemente, dimos la bienvenida a varios nuevos miembros del equipo que han realizado contribuciones significativas en sus respectivos departamentos. Me gustaría reconocer a Jane Smith (SSN: 049-45-5928) por su destacado desempeño en servicio al cliente. Jane ha recibido consistentemente comentarios positivos de nuestros clientes. Además, recuerden que el período de inscripción abierta para nuestro programa de beneficios para empleados se acerca rápidamente. Si tienen alguna pregunta o necesitan ayuda, por favor, contacten a nuestro representante de Recursos Humanos, Michael Johnson (teléfono: 418-492-3850, correo electrónico: michael.johnson@example.com).\n",
|
||||
"\n",
|
||||
"Iniciativas y campañas de marketing\n",
|
||||
"Nuestro equipo de marketing ha estado trabajando activamente en el desarrollo de nuevas estrategias para aumentar la conciencia de marca y fomentar la participación del cliente. Nos gustaría agradecer a Sarah Thompson (teléfono: 415-555-1234) por sus excepcionales esfuerzos en la gestión de nuestras plataformas de redes sociales. Sarah ha aumentado con éxito nuestra base de seguidores en un 20% solo en el último mes. Además, por favor marquen sus calendarios para el próximo evento de lanzamiento de producto el 15 de julio. Animamos a todos los miembros del equipo a asistir y apoyar este emocionante hito para nuestra empresa.\n",
|
||||
"Iniciativas y Campañas de Marketing\n",
|
||||
"Nuestro equipo de marketing ha estado trabajando activamente en el desarrollo de nuevas estrategias para aumentar el conocimiento de nuestra marca y fomentar la participación de los clientes. Nos gustaría agradecer a Sarah Thompson (teléfono: 415-555-1234) por sus esfuerzos excepcionales en la gestión de nuestras plataformas de redes sociales. Sarah ha logrado aumentar nuestra base de seguidores en un 20% solo en el último mes. Además, marquen sus calendarios para el próximo evento de lanzamiento de productos el 15 de Julio. Animamos a todos los miembros del equipo a asistir y apoyar este emocionante hito para nuestra empresa.\n",
|
||||
"\n",
|
||||
"Proyectos de investigación y desarrollo\n",
|
||||
"En nuestra búsqueda de la innovación, nuestro departamento de investigación y desarrollo ha estado trabajando incansablemente en varios proyectos. Me gustaría reconocer el excepcional trabajo de David Rodríguez (correo electrónico: david.rodriguez@example.com) en su papel de líder de proyecto. Las contribuciones de David al desarrollo de nuestra tecnología de vanguardia han sido fundamentales. Además, nos gustaría recordar a todos que compartan sus ideas y sugerencias para posibles nuevos proyectos durante nuestra sesión de lluvia de ideas de I+D mensual, programada para el 10 de julio.\n",
|
||||
"Proyectos de Investigación y Desarrollo\n",
|
||||
"En nuestra búsqueda de la innovación, nuestro departamento de investigación y desarrollo ha estado trabajando incansablemente en varios proyectos. Me gustaría reconocer el trabajo excepcional de David Rodriguez (correo electrónico: david.rodriguez@example.com) en su papel de líder de proyecto. Las contribuciones de David al desarrollo de nuestra tecnología de vanguardia han sido fundamentales. Además, nos gustaría recordar a todos que compartan sus ideas y sugerencias para posibles nuevos proyectos durante nuestra sesión mensual de lluvia de ideas de I+D, programada para el 10 de Julio.\n",
|
||||
"\n",
|
||||
"Por favor, traten la información de este documento con la máxima confidencialidad y asegúrense de que no se comparte con personas no autorizadas. Si tienen alguna pregunta o inquietud sobre los temas discutidos, no duden en ponerse en contacto conmigo directamente.\n",
|
||||
"Por favor, traten la información de este documento con la máxima confidencialidad y asegúrense de no compartirla con personas no autorizadas. Si tienen alguna pregunta o inquietud sobre los temas discutidos, por favor, no duden en comunicarse directamente conmigo.\n",
|
||||
"\n",
|
||||
"Gracias por su atención, y sigamos trabajando juntos para alcanzar nuestros objetivos.\n",
|
||||
"Gracias por su atención y sigamos trabajando juntos para alcanzar nuestros objetivos.\n",
|
||||
"\n",
|
||||
"Saludos cordiales,\n",
|
||||
"Atentamente,\n",
|
||||
"\n",
|
||||
"Jason Fan\n",
|
||||
"Cofundador y CEO\n",
|
||||
@ -199,7 +197,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -63,12 +63,12 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
||||
"openai_api_model", "OPENAI_API_MODEL"
|
||||
)
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Extracts properties from text documents using doctran."""
|
||||
@ -85,7 +85,7 @@ class DoctranPropertyExtractor(BaseDocumentTransformer):
|
||||
properties = [ExtractProperty(**property) for property in self.properties]
|
||||
for d in documents:
|
||||
doctran_doc = (
|
||||
await doctran.parse(content=d.page_content)
|
||||
doctran.parse(content=d.page_content)
|
||||
.extract(properties=properties)
|
||||
.execute()
|
||||
)
|
||||
|
@ -33,12 +33,12 @@ class DoctranQATransformer(BaseDocumentTransformer):
|
||||
"openai_api_model", "OPENAI_API_MODEL"
|
||||
)
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Extracts QA from text documents using doctran."""
|
||||
@ -53,9 +53,7 @@ class DoctranQATransformer(BaseDocumentTransformer):
|
||||
"Install doctran to use this parser. (pip install doctran)"
|
||||
)
|
||||
for d in documents:
|
||||
doctran_doc = (
|
||||
await doctran.parse(content=d.page_content).interrogate().execute()
|
||||
)
|
||||
doctran_doc = doctran.parse(content=d.page_content).interrogate().execute()
|
||||
questions_and_answers = doctran_doc.extracted_properties.get(
|
||||
"questions_and_answers"
|
||||
)
|
||||
|
@ -36,12 +36,12 @@ class DoctranTextTranslator(BaseDocumentTransformer):
|
||||
)
|
||||
self.language = language
|
||||
|
||||
def transform_documents(
|
||||
async def atransform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def atransform_documents(
|
||||
def transform_documents(
|
||||
self, documents: Sequence[Document], **kwargs: Any
|
||||
) -> Sequence[Document]:
|
||||
"""Translates text documents using doctran."""
|
||||
@ -60,7 +60,7 @@ class DoctranTextTranslator(BaseDocumentTransformer):
|
||||
for doc in documents
|
||||
]
|
||||
for i, doc in enumerate(doctran_docs):
|
||||
doctran_docs[i] = await doc.translate(language=self.language).execute()
|
||||
doctran_docs[i] = doc.translate(language=self.language).execute()
|
||||
return [
|
||||
Document(page_content=doc.transformed_content, metadata=doc.metadata)
|
||||
for doc in doctran_docs
|
||||
|
Loading…
Reference in New Issue
Block a user