[fix] create redis vector index before adding docs, add prefix to doc… (#11257)

Fix Description: 
For Redis Vector integration in add_texts method, there were two issues
that lead to this bug.
1. Vector index is not being created leading to no such_index error 
2. `doc:index` prefix was also missing for Redis Keys. 

resolves #11197 
Maintainer: @baskaryan

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Anurag Wagh 2023-10-24 23:21:25 +05:30 committed by GitHub
parent 079d1f3b8e
commit d5c2ce7c2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 28 deletions

View File

@ -506,7 +506,7 @@ class RedisSemanticCache(BaseCache):
index_schema=cast(Dict, self.DEFAULT_SCHEMA), index_schema=cast(Dict, self.DEFAULT_SCHEMA),
) )
_embedding = self.embedding.embed_query(text="test") _embedding = self.embedding.embed_query(text="test")
redis._create_index(dim=len(_embedding)) redis._create_index_if_not_exist(dim=len(_embedding))
self._cache_dict[index_name] = redis self._cache_dict[index_name] = redis
return self._cache_dict[index_name] return self._cache_dict[index_name]

View File

@ -54,16 +54,6 @@ if TYPE_CHECKING:
from langchain.vectorstores.redis.schema import RedisModel from langchain.vectorstores.redis.schema import RedisModel
def _redis_key(prefix: str) -> str:
"""Redis key schema for a given prefix."""
return f"{prefix}:{uuid.uuid4().hex}"
def _redis_prefix(index_name: str) -> str:
"""Redis key prefix for a given index."""
return f"doc:{index_name}"
def _default_relevance_score(val: float) -> float: def _default_relevance_score(val: float) -> float:
return 1 - val return 1 - val
@ -94,6 +84,7 @@ class Redis(VectorStore):
search API available. search API available.
.. code-block:: bash .. code-block:: bash
# to run redis stack in docker locally # to run redis stack in docker locally
docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
@ -258,6 +249,7 @@ class Redis(VectorStore):
index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None, index_schema: Optional[Union[Dict[str, str], str, os.PathLike]] = None,
vector_schema: Optional[Dict[str, Union[str, int]]] = None, vector_schema: Optional[Dict[str, Union[str, int]]] = None,
relevance_score_fn: Optional[Callable[[float], float]] = None, relevance_score_fn: Optional[Callable[[float], float]] = None,
key_prefix: Optional[str] = None,
**kwargs: Any, **kwargs: Any,
): ):
"""Initialize with necessary components.""" """Initialize with necessary components."""
@ -284,6 +276,7 @@ class Redis(VectorStore):
self.client = redis_client self.client = redis_client
self.relevance_score_fn = relevance_score_fn self.relevance_score_fn = relevance_score_fn
self._schema = self._get_schema_with_defaults(index_schema, vector_schema) self._schema = self._get_schema_with_defaults(index_schema, vector_schema)
self.key_prefix = key_prefix if key_prefix is not None else f"doc:{index_name}"
@property @property
def embeddings(self) -> Optional[Embeddings]: def embeddings(self) -> Optional[Embeddings]:
@ -420,14 +413,8 @@ class Redis(VectorStore):
**kwargs, **kwargs,
) )
# Create embeddings over documents
embeddings = embedding.embed_documents(texts)
# Create the search index
instance._create_index(dim=len(embeddings[0]))
# Add data to Redis # Add data to Redis
keys = instance.add_texts(texts, metadatas, embeddings, keys=keys) keys = instance.add_texts(texts, metadatas, keys=keys)
return instance, keys return instance, keys
@classmethod @classmethod
@ -692,7 +679,6 @@ class Redis(VectorStore):
List[str]: List of ids added to the vectorstore List[str]: List of ids added to the vectorstore
""" """
ids = [] ids = []
prefix = _redis_prefix(self.index_name)
# Get keys or ids from kwargs # Get keys or ids from kwargs
# Other vectorstores use ids # Other vectorstores use ids
@ -705,22 +691,24 @@ class Redis(VectorStore):
if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)): if not (isinstance(metadatas, list) and isinstance(metadatas[0], dict)):
raise ValueError("Metadatas must be a list of dicts") raise ValueError("Metadatas must be a list of dicts")
embeddings = embeddings or self._embeddings.embed_documents(list(texts))
self._create_index_if_not_exist(dim=len(embeddings[0]))
# Write data to redis # Write data to redis
pipeline = self.client.pipeline(transaction=False) pipeline = self.client.pipeline(transaction=False)
for i, text in enumerate(texts): for i, text in enumerate(texts):
# Use provided values by default or fallback # Use provided values by default or fallback
key = keys_or_ids[i] if keys_or_ids else _redis_key(prefix) key = keys_or_ids[i] if keys_or_ids else str(uuid.uuid4().hex)
if not key.startswith(self.key_prefix + ":"):
key = self.key_prefix + ":" + key
metadata = metadatas[i] if metadatas else {} metadata = metadatas[i] if metadatas else {}
metadata = _prepare_metadata(metadata) if clean_metadata else metadata metadata = _prepare_metadata(metadata) if clean_metadata else metadata
embedding = (
embeddings[i] if embeddings else self._embeddings.embed_query(text)
)
pipeline.hset( pipeline.hset(
key, key,
mapping={ mapping={
self._schema.content_key: text, self._schema.content_key: text,
self._schema.content_vector_key: _array_to_buffer( self._schema.content_vector_key: _array_to_buffer(
embedding, self._schema.vector_dtype embeddings[i], self._schema.vector_dtype
), ),
**metadata, **metadata,
}, },
@ -1212,7 +1200,7 @@ class Redis(VectorStore):
schema.add_vector_field(vector_field) schema.add_vector_field(vector_field)
return schema return schema
def _create_index(self, dim: int = 1536) -> None: def _create_index_if_not_exist(self, dim: int = 1536) -> None:
try: try:
from redis.commands.search.indexDefinition import ( # type: ignore from redis.commands.search.indexDefinition import ( # type: ignore
IndexDefinition, IndexDefinition,
@ -1232,12 +1220,12 @@ class Redis(VectorStore):
# Check if index exists # Check if index exists
if not check_index_exists(self.client, self.index_name): if not check_index_exists(self.client, self.index_name):
prefix = _redis_prefix(self.index_name)
# Create Redis Index # Create Redis Index
self.client.ft(self.index_name).create_index( self.client.ft(self.index_name).create_index(
fields=self._schema.get_fields(), fields=self._schema.get_fields(),
definition=IndexDefinition(prefix=[prefix], index_type=IndexType.HASH), definition=IndexDefinition(
prefix=[self.key_prefix], index_type=IndexType.HASH
),
) )
def _calculate_fp_distance(self, distance: str) -> float: def _calculate_fp_distance(self, distance: str) -> float: