mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 13:36:15 +00:00
Add new types of document transformers (#7379)
- Description: Add two new document transformers that translates documents into different languages and converts documents into q&a format to improve vector search results. Uses OpenAI function calling via the [doctran](https://github.com/psychic-api/doctran/tree/main) library. - Issue: N/A - Dependencies: `doctran = "^0.0.5"` - Tag maintainer: @rlancemartin @eyurtsev @hwchase17 - Twitter handle: @psychicapi or @jfan001 Notes - Adheres to the `DocumentTransformer` abstraction set by @dev2049 in #3182 - refactored `EmbeddingsRedundantFilter` to put it in a file under a new `document_transformers` module - Added basic docs for `DocumentInterrogator`, `DocumentTransformer` as well as the existing `EmbeddingsRedundantFilter` --------- Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
"""Integration test for embedding-based relevant doc filtering."""
|
||||
import numpy as np
|
||||
|
||||
from langchain.document_transformers import _DocumentWithState
|
||||
from langchain.document_transformers.embeddings_redundant_filter import (
|
||||
_DocumentWithState,
|
||||
)
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter
|
||||
from langchain.schema import Document
|
||||
|
@@ -1,5 +1,5 @@
|
||||
"""Integration test for embedding-based redundant doc filtering."""
|
||||
from langchain.document_transformers import (
|
||||
from langchain.document_transformers.embeddings_redundant_filter import (
|
||||
EmbeddingsClusteringFilter,
|
||||
EmbeddingsRedundantFilter,
|
||||
_DocumentWithState,
|
||||
|
@@ -1,5 +1,7 @@
|
||||
"""Unit tests for document transformers."""
|
||||
from langchain.document_transformers import _filter_similar_embeddings
|
||||
from langchain.document_transformers.embeddings_redundant_filter import (
|
||||
_filter_similar_embeddings,
|
||||
)
|
||||
from langchain.math_utils import cosine_similarity
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user