mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-20 11:31:58 +00:00
[fix] create redis vector index before adding docs, add prefix to doc… (#11257)
Fix Description: For Redis Vector integration in add_texts method, there were two issues that lead to this bug. 1. Vector index is not being created leading to no such_index error 2. `doc:index` prefix was also missing for Redis Keys. resolves #11197 Maintainer: @baskaryan --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
079d1f3b8e
commit
d5c2ce7c2e
@ -506,7 +506,7 @@ class RedisSemanticCache(BaseCache):
|
||||
index_schema=cast(Dict, self.DEFAULT_SCHEMA),
|
||||
)
|
||||
_embedding = self.embedding.embed_query(text="test")
|
||||
redis._create_index(dim=len(_embedding))
|
||||
redis._create_index_if_not_exist(dim=len(_embedding))
|
||||
self._cache_dict[index_name] = redis
|
||||
|
||||
return self._cache_dict[index_name]
|
||||
|
@ -54,16 +54,6 @@ if TYPE_CHECKING:
|
||||
from langchain.vectorstores.redis.schema import RedisModel
|
||||
|
||||
|
||||
def _redis_key(prefix: str) -> str:
|
||||
"""Redis key schema for a given prefix."""
|
||||
return f"{prefix}:{uuid.uuid4().hex}"
|
||||
|
||||
|
||||
def _redis_prefix(index_name: str) -> str:
|
||||
"""Redis key prefix for a given index."""
|
||||
return f"doc:{index_name}"
|
||||
|
||||
|
||||
def _default_relevance_score(val: float) -> float:
|
||||
return 1 - val
|
||||
|
||||
@ -94,6 +84,7 @@ class Redis(VectorStore):
|
||||
search API available.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# to run redis stack in docker locally
|
||||
docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
|
||||
|
||||
@ -258,6 +249,7 @@ class Redis(VectorStore):
|
||||
index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None,
|
||||
vector_schema: Optional[Dict[str, Union[str, int]]] = None,
|
||||
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
||||
key_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Initialize with necessary components."""
|
||||
@ -284,6 +276,7 @@ class Redis(VectorStore):
|
||||
self.client = redis_client
|
||||
self.relevance_score_fn = relevance_score_fn
|
||||
self._schema = self._get_schema_with_defaults(index_schema, vector_schema)
|
||||
self.key_prefix = key_prefix if key_prefix is not None else f"doc:{index_name}"
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
@ -420,14 +413,8 @@ class Redis(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Create embeddings over documents
|
||||
embeddings = embedding.embed_documents(texts)
|
||||
|
||||
# Create the search index
|
||||
instance._create_index(dim=len(embeddings[0]))
|
||||
|
||||
# Add data to Redis
|
||||
keys = instance.add_texts(texts, metadatas, embeddings, keys=keys)
|
||||
keys = instance.add_texts(texts, metadatas, keys=keys)
|
||||
return instance, keys
|
||||
|
||||
@classmethod
|
||||
@ -692,7 +679,6 @@ class Redis(VectorStore):
|
||||
List[str]: List of ids added to the vectorstore
|
||||
"""
|
||||
ids = []
|
||||
prefix = _redis_prefix(self.index_name)
|
||||
|
||||
# Get keys or ids from kwargs
|
||||
# Other vectorstores use ids
|
||||
@ -705,22 +691,24 @@ class Redis(VectorStore):
|
||||
if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)):
|
||||
raise ValueError("Metadatas must be a list of dicts")
|
||||
|
||||
embeddings = embeddings or self._embeddings.embed_documents(list(texts))
|
||||
self._create_index_if_not_exist(dim=len(embeddings[0]))
|
||||
|
||||
# Write data to redis
|
||||
pipeline = self.client.pipeline(transaction=False)
|
||||
for i, text in enumerate(texts):
|
||||
# Use provided values by default or fallback
|
||||
key = keys_or_ids[i] if keys_or_ids else _redis_key(prefix)
|
||||
key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex)
|
||||
if not key.startswith(self.key_prefix + ":"):
|
||||
key = self.key_prefix + ":" + key
|
||||
metadata = metadatas[i] if metadatas else {}
|
||||
metadata = _prepare_metadata(metadata) if clean_metadata else metadata
|
||||
embedding = (
|
||||
embeddings[i] if embeddings else self._embeddings.embed_query(text)
|
||||
)
|
||||
pipeline.hset(
|
||||
key,
|
||||
mapping={
|
||||
self._schema.content_key: text,
|
||||
self._schema.content_vector_key: _array_to_buffer(
|
||||
embedding, self._schema.vector_dtype
|
||||
embeddings[i], self._schema.vector_dtype
|
||||
),
|
||||
**metadata,
|
||||
},
|
||||
@ -1212,7 +1200,7 @@ class Redis(VectorStore):
|
||||
schema.add_vector_field(vector_field)
|
||||
return schema
|
||||
|
||||
def _create_index(self, dim: int = 1536) -> None:
|
||||
def _create_index_if_not_exist(self, dim: int = 1536) -> None:
|
||||
try:
|
||||
from redis.commands.search.indexDefinition import ( # type: ignore
|
||||
IndexDefinition,
|
||||
@ -1232,12 +1220,12 @@ class Redis(VectorStore):
|
||||
|
||||
# Check if index exists
|
||||
if not check_index_exists(self.client, self.index_name):
|
||||
prefix = _redis_prefix(self.index_name)
|
||||
|
||||
# Create Redis Index
|
||||
self.client.ft(self.index_name).create_index(
|
||||
fields=self._schema.get_fields(),
|
||||
definition=IndexDefinition(prefix=[prefix], index_type=IndexType.HASH),
|
||||
definition=IndexDefinition(
|
||||
prefix=[self.key_prefix], index_type=IndexType.HASH
|
||||
),
|
||||
)
|
||||
|
||||
def _calculate_fp_distance(self, distance: str) -> float:
|
||||
|
Loading…
Reference in New Issue
Block a user