rfc: instruct embeddings (#811)

Co-authored-by: seanaedmiston <seane999@gmail.com>
This commit is contained in:
Harrison Chase
2023-02-02 08:44:02 -08:00
committed by GitHub
parent 576609e665
commit d564308e0f
4 changed files with 155 additions and 3 deletions

View File

@@ -255,10 +255,68 @@
"query_result = embeddings.embed_query(text)"
]
},
{
"cell_type": "markdown",
"id": "59428e05",
"metadata": {},
"source": [
"## InstructEmbeddings\n",
"Let's load the HuggingFace instruct Embeddings class."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "92c5b61e",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings import HuggingFaceInstructEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "062547b9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"load INSTRUCTOR_Transformer\n",
"max_seq_length 512\n"
]
}
],
"source": [
"embeddings = HuggingFaceInstructEmbeddings(query_instruction=\"Represent the query for retrieval: \")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e1dcc4bd",
"metadata": {},
"outputs": [],
"source": [
"text = \"This is a test document.\""
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "90f0db94",
"metadata": {},
"outputs": [],
"source": [
"query_result = embeddings.embed_query(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90f0db94",
"id": "a961cdb5",
"metadata": {},
"outputs": [],
"source": []