From 4dc47bd3acc8928359773fc3fb80d289b9eae55e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Lepied?= Date: Mon, 4 Sep 2023 00:05:30 +0200 Subject: [PATCH] time_weighted_retriever: use a timestamp if needed (#9906) If last_accessed_at metadata is a float use it as a timestamp. This allows to support vector stores that do not store datetime objects like ChromaDb. Fixes: https://github.com/langchain-ai/langchain/issues/3685 --- .../langchain/retrievers/time_weighted_retriever.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/retrievers/time_weighted_retriever.py b/libs/langchain/langchain/retrievers/time_weighted_retriever.py index 44d7c6ac6af..75b49a8c376 100644 --- a/libs/langchain/langchain/retrievers/time_weighted_retriever.py +++ b/libs/langchain/langchain/retrievers/time_weighted_retriever.py @@ -47,6 +47,14 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever): arbitrary_types_allowed = True + def _document_get_date(self, field: str, document: Document) -> datetime.datetime: + """Return the value of the date field of a document.""" + if field in document.metadata: + if type(document.metadata[field]) == float: + return datetime.datetime.fromtimestamp(document.metadata[field]) + return document.metadata[field] + return datetime.datetime.now() + def _get_combined_score( self, document: Document, @@ -56,7 +64,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever): """Return the combined score for a document.""" hours_passed = _get_hours_passed( current_time, - document.metadata["last_accessed_at"], + self._document_get_date("last_accessed_at", document), ) score = (1.0 - self.decay_rate) ** hours_passed for key in self.other_score_keys: