mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-24 05:50:18 +00:00
Minor updates
This commit is contained in:
@@ -172,6 +172,18 @@
|
||||
"Next, we build a dataset of QA pairs based upon the [documentation](https://python.langchain.com/docs/expression_language/) that we indexed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "22f0daeb-6a61-4f8d-a4fc-4c7d22b6dc61",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n",
|
||||
"os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n",
|
||||
"os.environ['LANGCHAIN_API_KEY'] = <your-api-key>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@@ -179,6 +191,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client \n",
|
||||
"\n",
|
||||
"# QA\n",
|
||||
"inputs = [\n",
|
||||
" \"How can I directly pass a string to a runnable and use it to construct the input needed for my prompt?\",\n",
|
||||
@@ -227,7 +241,7 @@
|
||||
"\n",
|
||||
"For comparing questions and answers, common built-in `LangChainStringEvaluator` options are `QA` and `CoTQA` [here different evaluators](https://docs.smith.langchain.com/evaluation/faq/evaluator-implementations).\n",
|
||||
"\n",
|
||||
"We will use `CoT_QA` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://github.com/langchain-ai/langchain/blob/22da9f5f3f9fef24c5c75072b678b8a2f654b173/libs/langchain/langchain/evaluation/qa/eval_prompt.py#L43).\n",
|
||||
"We will use `CoT_QA` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://smith.langchain.com/hub/langchain-ai/cot_qa).\n",
|
||||
"\n",
|
||||
"But, all `LangChainStringEvaluator` expose a common interface to pass your inputs:\n",
|
||||
"\n",
|
||||
@@ -326,7 +340,7 @@
|
||||
"\n",
|
||||
"For comparing documents and answers, common built-in `LangChainStringEvaluator` options are `Criteria` [here](https://python.langchain.com/docs/guides/productionization/evaluation/string/criteria_eval_chain/#using-reference-labels) because we want to supply custom criteria.\n",
|
||||
"\n",
|
||||
"We will use `labeled_score_string` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/evaluation/criteria/prompt.py).\n",
|
||||
"We will use `labeled_score_string` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://smith.langchain.com/hub/wfh/labeled-score-string).\n",
|
||||
"\n",
|
||||
"Here, we only need to use two inputs of the `LangChainStringEvaluator` interface:\n",
|
||||
"\n",
|
||||
@@ -338,7 +352,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 18,
|
||||
"id": "7f0872a5-e989-415d-9fed-5846efaa9488",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -349,7 +363,10 @@
|
||||
" \"labeled_score_string\", \n",
|
||||
" config={\n",
|
||||
" \"criteria\": { \n",
|
||||
" \"accuracy\": \"Is the prediction grounded in the reference?\"\n",
|
||||
" \"accuracy\": \"\"\"Is the Assistant's Answer grounded in the Ground Truth documentation? A score of 0 means that the\n",
|
||||
" Assistant answer contains is not at all based upon / grounded in the Groun Truth documentation. A score of 5 means \n",
|
||||
" that the Assistant answer contains some information (e.g., a hallucination) that is not captured in the Ground Truth \n",
|
||||
" documentation. A score of 10 means that the Assistant answer is fully based upon the in the Ground Truth documentation.\"\"\"\n",
|
||||
" },\n",
|
||||
" # If you want the score to be saved on a scale from 0 to 1\n",
|
||||
" \"normalize_by\": 10,\n",
|
||||
@@ -364,7 +381,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 19,
|
||||
"id": "6d5bf61b-3903-4cde-9ecf-67f0e0874521",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -372,8 +389,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"View the evaluation results for experiment: 'rag-qa-oai-hallucination-94fa7798' at:\n",
|
||||
"https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/368734fb-7c14-4e1f-b91a-50d52cb58a07/compare?selectedSessions=5d82d039-0596-40a6-b901-6fe5a2e4223b\n",
|
||||
"View the evaluation results for experiment: 'rag-qa-oai-hallucination-fad2e13c' at:\n",
|
||||
"https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/368734fb-7c14-4e1f-b91a-50d52cb58a07/compare?selectedSessions=9a1e9e7d-cf87-4b89-baf6-f5498a160627\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
@@ -381,7 +398,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "72dcf5fab4f24130a72390d947f48b54",
|
||||
"model_id": "891904d8d44444e98c6a03faa43e147a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -425,7 +442,7 @@
|
||||
"\n",
|
||||
"For comparing documents and answers, common built-in `LangChainStringEvaluator` options are `Criteria` [here](https://python.langchain.com/docs/guides/productionization/evaluation/string/criteria_eval_chain/#using-reference-labels) because we want to supply custom criteria.\n",
|
||||
"\n",
|
||||
"We will use `labeled_score_string` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/evaluation/criteria/prompt.py).\n",
|
||||
"We will use `labeled_score_string` as an LLM-as-judge evaluator, which uses the eval prompt defined [here](https://smith.langchain.com/hub/wfh/labeled-score-string).\n",
|
||||
"\n",
|
||||
"Here, we only need to use two inputs of the `LangChainStringEvaluator` interface:\n",
|
||||
"\n",
|
||||
@@ -437,7 +454,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 22,
|
||||
"id": "df247034-14ed-40b1-b313-b0fef7286546",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -448,7 +465,11 @@
|
||||
" \"labeled_score_string\", \n",
|
||||
" config={\n",
|
||||
" \"criteria\": { \n",
|
||||
" \"accuracy\": \"Is the prediction relevant to the reference?\"\n",
|
||||
" \"accuracy\": \"\"\"The Assistant's Answer is a set of documents retrieved from a vectorstore. The Ground Truth is a question\n",
|
||||
" used for retrieval. You will score whether the Assistant's Answer (retrieved docs) are relevant to the Ground Truth \n",
|
||||
" question. A score of 0 means that the Assistant answer contains documents that are not at all relevant to the \n",
|
||||
" Ground Truth question. A score of 5 means that the Assistant answer contains some documents are relevant to the Ground Truth \n",
|
||||
" question. A score of 10 means that all of the Assistant answer documents are all relevant to the Ground Truth question\"\"\"\n",
|
||||
" },\n",
|
||||
" # If you want the score to be saved on a scale from 0 to 1\n",
|
||||
" \"normalize_by\": 10,\n",
|
||||
@@ -463,7 +484,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 23,
|
||||
"id": "cfe988dc-2aaa-42f4-93ff-c3c9fe6b3124",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -471,8 +492,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"View the evaluation results for experiment: 'rag-qa-oai-doc-relevance-1ac405db' at:\n",
|
||||
"https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/368734fb-7c14-4e1f-b91a-50d52cb58a07/compare?selectedSessions=75be8a78-e92d-4f8a-a73b-d6512903add0\n",
|
||||
"View the evaluation results for experiment: 'rag-qa-oai-doc-relevance-82244196' at:\n",
|
||||
"https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/368734fb-7c14-4e1f-b91a-50d52cb58a07/compare?selectedSessions=3bbf09c9-69de-47ba-9d3c-7bcedf5cd48f\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
@@ -480,7 +501,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "2d70afcc5b3c49b59a3b64a952dfd14b",
|
||||
"model_id": "4e4091f1053b4d34871aa87428297e12",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -512,22 +533,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "62b936c4-d24f-4596-a907-3dac7952c6e6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "528ba4b9-8026-4024-93bc-d8c413bb5f71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Reference in New Issue
Block a user