From 4dc47bd3acc8928359773fc3fb80d289b9eae55e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Lepied?= <flepied@gmail.com>
Date: Mon, 4 Sep 2023 00:05:30 +0200
Subject: [PATCH] time_weighted_retriever: use a timestamp if needed (#9906)

If last_accessed_at metadata is a float use it as a timestamp. This
allows to support vector stores that do not store datetime objects like
ChromaDb.

Fixes: https://github.com/langchain-ai/langchain/issues/3685

<!-- Thank you for contributing to LangChain!

Replace this entire comment with:
  - Description: a description of the change,
  - Issue: the issue # it fixes (if applicable),
  - Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer
(see below),
- Twitter handle: we announce bigger features on Twitter. If your PR
gets announced and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc:

https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. These live is docs/extras
directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17, @rlancemartin.
 -->
---
 .../langchain/retrievers/time_weighted_retriever.py    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/retrievers/time_weighted_retriever.py b/libs/langchain/langchain/retrievers/time_weighted_retriever.py
index 44d7c6ac6af..75b49a8c376 100644
--- a/libs/langchain/langchain/retrievers/time_weighted_retriever.py
+++ b/libs/langchain/langchain/retrievers/time_weighted_retriever.py
@@ -47,6 +47,14 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
 
         arbitrary_types_allowed = True
 
+    def _document_get_date(self, field: str, document: Document) -> datetime.datetime:
+        """Return the value of the date field of a document."""
+        if field in document.metadata:
+            if type(document.metadata[field]) == float:
+                return datetime.datetime.fromtimestamp(document.metadata[field])
+            return document.metadata[field]
+        return datetime.datetime.now()
+
     def _get_combined_score(
         self,
         document: Document,
@@ -56,7 +64,7 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever):
         """Return the combined score for a document."""
         hours_passed = _get_hours_passed(
             current_time,
-            document.metadata["last_accessed_at"],
+            self._document_get_date("last_accessed_at", document),
         )
         score = (1.0 - self.decay_rate) ** hours_passed
         for key in self.other_score_keys: