From e0a1278d2b127c68bba22a3a4d4533f336783203 Mon Sep 17 00:00:00 2001 From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:46:04 +0100 Subject: [PATCH] docs: HFEmbeddings: Add more information to model_kwargs/encode_kwargs (#19594) - **Description:** Be more explicit with the `model_kwargs` and `encode_kwargs` for `HuggingFaceEmbeddings`. - **Issue:** - - **Dependencies:** - I received some reports by my users that they didn't realise that you could change the default `batch_size` with `HuggingFaceEmbeddings`, which may be attributed to how the `model_kwargs` and `encode_kwargs` don't give much information about what you can specify. I've added some parameter names & links to the Sentence Transformers documentation to help clear it up. Let me know if you'd rather have Markdown/Sphinx-style hyperlinks rather than a "bare URL". - Tom Aarsen --- .../langchain_community/embeddings/huggingface.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/embeddings/huggingface.py b/libs/community/langchain_community/embeddings/huggingface.py index 3d83ce2c498..3c9cee38927 100644 --- a/libs/community/langchain_community/embeddings/huggingface.py +++ b/libs/community/langchain_community/embeddings/huggingface.py @@ -44,9 +44,14 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings): """Path to store models. Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass to the model.""" + """Keyword arguments to pass to the Sentence Transformer model, such as `device`, + `prompts`, `default_prompt_name`, `revision`, `trust_remote_code`, or `token`. + See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer""" encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Keyword arguments to pass when calling the `encode` method of the model.""" + """Keyword arguments to pass when calling the `encode` method of the Sentence + Transformer model, such as `prompt_name`, `prompt`, `batch_size`, `precision`, + `normalize_embeddings`, and more. + See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode""" multi_process: bool = False """Run encode() on multiple GPUs.""" show_progress: bool = False