mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-30 23:29:54 +00:00 
			
		
		
		
	DOCS: integrations/text_embeddings/ cleanup (#13476)
				
					
				
			Updated several notebooks: - fixed titles which are inconsistent or break the ToC sorting order. - added missed soruce descriptions and links - fixed formatting
This commit is contained in:
		| @@ -4,9 +4,9 @@ | |||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# ERNIE Embedding-V1\n", |     "# ERNIE\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "[ERNIE Embedding-V1](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu) is a text representation model based on Baidu Wenxin's large-scale model technology, \n", |     "[ERNIE Embedding-V1](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu) is a text representation model based on `Baidu Wenxin` large-scale model technology, \n", | ||||||
|     "which converts text into a vector form represented by numerical values, and is used in text retrieval, information recommendation, knowledge mining and other scenarios." |     "which converts text into a vector form represented by numerical values, and is used in text retrieval, information recommendation, knowledge mining and other scenarios." | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
| @@ -53,8 +53,19 @@ | |||||||
|    "language": "python", |    "language": "python", | ||||||
|    "name": "python3" |    "name": "python3" | ||||||
|   }, |   }, | ||||||
|   "orig_nbformat": 4 |   "language_info": { | ||||||
|  |    "codemirror_mode": { | ||||||
|  |     "name": "ipython", | ||||||
|  |     "version": 3 | ||||||
|  |    }, | ||||||
|  |    "file_extension": ".py", | ||||||
|  |    "mimetype": "text/x-python", | ||||||
|  |    "name": "python", | ||||||
|  |    "nbconvert_exporter": "python", | ||||||
|  |    "pygments_lexer": "ipython3", | ||||||
|  |    "version": "3.10.12" | ||||||
|  |   } | ||||||
|  }, |  }, | ||||||
|  "nbformat": 4, |  "nbformat": 4, | ||||||
|  "nbformat_minor": 2 |  "nbformat_minor": 4 | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,14 +5,14 @@ | |||||||
|    "id": "900fbd04-f6aa-4813-868f-1c54e3265385", |    "id": "900fbd04-f6aa-4813-868f-1c54e3265385", | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# Qdrant FastEmbed\n", |     "# FastEmbed by Qdrant\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "[FastEmbed](https://qdrant.github.io/fastembed/) is a lightweight, fast, Python library built for embedding generation. \n", |     ">[FastEmbed](https://qdrant.github.io/fastembed/) from [Qdrant](https://qdrant.tech) is a lightweight, fast, Python library built for embedding generation. \n", | ||||||
|     "\n", |     ">\n", | ||||||
|     "- Quantized model weights\n", |     ">- Quantized model weights\n", | ||||||
|     "- ONNX Runtime, no PyTorch dependency\n", |     ">- ONNX Runtime, no PyTorch dependency\n", | ||||||
|     "- CPU-first design\n", |     ">- CPU-first design\n", | ||||||
|     "- Data-parallelism for encoding of large datasets." |     ">- Data-parallelism for encoding of large datasets." | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
| @@ -154,7 +154,7 @@ | |||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython3", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "3.11.6" |    "version": "3.10.12" | ||||||
|   } |   } | ||||||
|  }, |  }, | ||||||
|  "nbformat": 4, |  "nbformat": 4, | ||||||
|   | |||||||
| @@ -5,8 +5,10 @@ | |||||||
|    "id": "59428e05", |    "id": "59428e05", | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# InstructEmbeddings\n", |     "# Instruct Embeddings on Hugging Face\n", | ||||||
|     "Let's load the HuggingFace instruct Embeddings class." |     "\n", | ||||||
|  |     ">[Hugging Face sentence-transformers](https://huggingface.co/sentence-transformers) is a Python framework for state-of-the-art sentence, text and image embeddings.\n", | ||||||
|  |     ">One of the instruct embedding models is used in the `HuggingFaceInstructEmbeddings` class.\n" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
| @@ -85,7 +87,7 @@ | |||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython3", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "3.9.1" |    "version": "3.10.12" | ||||||
|   }, |   }, | ||||||
|   "vscode": { |   "vscode": { | ||||||
|    "interpreter": { |    "interpreter": { | ||||||
|   | |||||||
| @@ -2,183 +2,207 @@ | |||||||
|  "cells": [ |  "cells": [ | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [ |  | ||||||
|     "# Johnsnowlabs Embedding\n", |  | ||||||
|     "\n", |  | ||||||
|     "### Loading the Johnsnowlabs embedding class to generate and query embeddings\n", |  | ||||||
|     "\n", |  | ||||||
|     "Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood.\n", |  | ||||||
|     "For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models)\n" |  | ||||||
|    ], |  | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "collapsed": false |     "collapsed": false, | ||||||
|    } |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "# John Snow Labs\n", | ||||||
|  |     "\n", | ||||||
|  |     ">[John Snow Labs](https://nlp.johnsnowlabs.com/) NLP & LLM ecosystem includes software libraries for state-of-the-art AI at scale, Responsible AI, No-Code AI, and access to over 20,000 models for Healthcare, Legal, Finance, etc.\n", | ||||||
|  |     ">\n", | ||||||
|  |     ">Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started >with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood.\n", | ||||||
|  |     ">For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models)\n" | ||||||
|  |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|  |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "! pip install johnsnowlabs\n" |     "## Setting up" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "outputs": [], | ||||||
|  |    "source": [ | ||||||
|  |     "! pip install johnsnowlabs" | ||||||
|  |    ] | ||||||
|  |   }, | ||||||
|  |   { | ||||||
|  |    "cell_type": "code", | ||||||
|  |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# If you have a enterprise license, you can run this to install enterprise features\n", |     "# If you have a enterprise license, you can run this to install enterprise features\n", | ||||||
|     "# from johnsnowlabs import nlp\n", |     "# from johnsnowlabs import nlp\n", | ||||||
|     "# nlp.install()" |     "# nlp.install()" | ||||||
|    ], |  | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |  | ||||||
|   { |  | ||||||
|    "cell_type": "code", |  | ||||||
|    "source": [ |  | ||||||
|     "#### Import the necessary classes" |  | ||||||
|    ], |  | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    }, |  | ||||||
|    "execution_count": 1, |  | ||||||
|    "outputs": [ |  | ||||||
|     { |  | ||||||
|      "name": "stdout", |  | ||||||
|      "output_type": "stream", |  | ||||||
|      "text": [ |  | ||||||
|       "Found existing installation: langchain 0.0.189\n", |  | ||||||
|       "Uninstalling langchain-0.0.189:\n", |  | ||||||
|       "  Successfully uninstalled langchain-0.0.189\n" |  | ||||||
|      ] |  | ||||||
|     } |  | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [], |    "metadata": {}, | ||||||
|    "metadata": { |    "source": [ | ||||||
|     "collapsed": false |     "## Example" | ||||||
|    } |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings" |     "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [ |  | ||||||
|     "#### Initialize Johnsnowlabs Embeddings and Spark Session" |  | ||||||
|    ], |  | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "collapsed": false |     "collapsed": false, | ||||||
|    } |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "Initialize Johnsnowlabs Embeddings and Spark Session" | ||||||
|  |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "embedder = JohnSnowLabsEmbeddings(\"en.embed_sentence.biobert.clinical_base_cased\")" |     "embedder = JohnSnowLabsEmbeddings(\"en.embed_sentence.biobert.clinical_base_cased\")" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [ |  | ||||||
|     "#### Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews." |  | ||||||
|    ], |  | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "collapsed": false |     "collapsed": false, | ||||||
|    } |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews." | ||||||
|  |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "texts = [\"Cancer is caused by smoking\", \"Antibiotics aren't painkiller\"]" |     "texts = [\"Cancer is caused by smoking\", \"Antibiotics aren't painkiller\"]" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [ |  | ||||||
|     "#### Generate and print embeddings for the texts . The JohnSnowLabsEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification." |  | ||||||
|    ], |  | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "collapsed": false |     "collapsed": false, | ||||||
|    } |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "Generate and print embeddings for the texts . The JohnSnowLabsEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification." | ||||||
|  |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "embeddings = embedder.embed_documents(texts)\n", |     "embeddings = embedder.embed_documents(texts)\n", | ||||||
|     "for i, embedding in enumerate(embeddings):\n", |     "for i, embedding in enumerate(embeddings):\n", | ||||||
|     "    print(f\"Embedding for document {i+1}: {embedding}\")" |     "    print(f\"Embedding for document {i+1}: {embedding}\")" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "markdown", |    "cell_type": "markdown", | ||||||
|    "source": [ |  | ||||||
|     "#### Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query." |  | ||||||
|    ], |  | ||||||
|    "metadata": { |    "metadata": { | ||||||
|     "collapsed": false |     "collapsed": false, | ||||||
|    } |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|  |    "source": [ | ||||||
|  |     "Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query." | ||||||
|  |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|    "cell_type": "code", |    "cell_type": "code", | ||||||
|    "execution_count": null, |    "execution_count": null, | ||||||
|  |    "metadata": { | ||||||
|  |     "collapsed": false, | ||||||
|  |     "jupyter": { | ||||||
|  |      "outputs_hidden": false | ||||||
|  |     } | ||||||
|  |    }, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "query = \"Cancer is caused by smoking\"\n", |     "query = \"Cancer is caused by smoking\"\n", | ||||||
|     "query_embedding = embedder.embed_query(query)\n", |     "query_embedding = embedder.embed_query(query)\n", | ||||||
|     "print(f\"Embedding for query: {query_embedding}\")" |     "print(f\"Embedding for query: {query_embedding}\")" | ||||||
|    ], |    ] | ||||||
|    "metadata": { |  | ||||||
|     "collapsed": false |  | ||||||
|    } |  | ||||||
|   } |   } | ||||||
|  ], |  ], | ||||||
|  "metadata": { |  "metadata": { | ||||||
|   "kernelspec": { |   "kernelspec": { | ||||||
|    "display_name": "Python 3", |    "display_name": "Python 3 (ipykernel)", | ||||||
|    "language": "python", |    "language": "python", | ||||||
|    "name": "python3" |    "name": "python3" | ||||||
|   }, |   }, | ||||||
|   "language_info": { |   "language_info": { | ||||||
|    "codemirror_mode": { |    "codemirror_mode": { | ||||||
|     "name": "ipython", |     "name": "ipython", | ||||||
|     "version": 2 |     "version": 3 | ||||||
|    }, |    }, | ||||||
|    "file_extension": ".py", |    "file_extension": ".py", | ||||||
|    "mimetype": "text/x-python", |    "mimetype": "text/x-python", | ||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython2", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "2.7.6" |    "version": "3.10.12" | ||||||
|   } |   } | ||||||
|  }, |  }, | ||||||
|  "nbformat": 4, |  "nbformat": 4, | ||||||
|  "nbformat_minor": 0 |  "nbformat_minor": 4 | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,11 +5,13 @@ | |||||||
|    "id": "ed47bb62", |    "id": "ed47bb62", | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# Sentence Transformers\n", |     "# Sentence Transformers on Hugging Face\n", | ||||||
|     "\n", |     "\n", | ||||||
|     ">[SentenceTransformers](https://www.sbert.net/) embeddings are called using the `HuggingFaceEmbeddings` integration. We have also added an alias for `SentenceTransformerEmbeddings` for users who are more familiar with directly using that package.\n", |     ">[Hugging Face sentence-transformers](https://huggingface.co/sentence-transformers) is a Python framework for state-of-the-art sentence, text and image embeddings.\n", | ||||||
|  |     ">One of the embedding models is used in the `HuggingFaceEmbeddings` class.\n", | ||||||
|  |     ">We have also added an alias for `SentenceTransformerEmbeddings` for users who are more familiar with directly using that package.\n", | ||||||
|     "\n", |     "\n", | ||||||
|     "`SentenceTransformers` is a python package that can generate text and image embeddings, originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)" |     "`sentence_transformers` package models are originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|   | |||||||
| @@ -5,7 +5,11 @@ | |||||||
|    "id": "fff4734f", |    "id": "fff4734f", | ||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "source": [ |    "source": [ | ||||||
|     "# TensorflowHub\n", |     "# TensorFlow Hub\n", | ||||||
|  |     "\n", | ||||||
|  |     ">[TensorFlow Hub](https://www.tensorflow.org/hub) is a repository of trained machine learning models ready for fine-tuning and deployable anywhere. Reuse trained models like `BERT` and `Faster R-CNN` with just a few lines of code.\n", | ||||||
|  |     ">\n", | ||||||
|  |     ">\n", | ||||||
|     "Let's load the TensorflowHub Embedding class." |     "Let's load the TensorflowHub Embedding class." | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
| @@ -105,7 +109,7 @@ | |||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython3", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "3.9.1" |    "version": "3.10.12" | ||||||
|   }, |   }, | ||||||
|   "vscode": { |   "vscode": { | ||||||
|    "interpreter": { |    "interpreter": { | ||||||
|   | |||||||
| @@ -7,6 +7,8 @@ | |||||||
|    "source": [ |    "source": [ | ||||||
|     "# Voyage AI\n", |     "# Voyage AI\n", | ||||||
|     "\n", |     "\n", | ||||||
|  |     ">[Voyage AI](https://www.voyageai.com/) provides cutting-edge embedding/vectorizations models.\n", | ||||||
|  |     "\n", | ||||||
|     "Let's load the Voyage Embedding class." |     "Let's load the Voyage Embedding class." | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
| @@ -215,7 +217,7 @@ | |||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython3", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "3.9.18" |    "version": "3.10.12" | ||||||
|   }, |   }, | ||||||
|   "vscode": { |   "vscode": { | ||||||
|    "interpreter": { |    "interpreter": { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user