langchain/docs/extras/modules/evaluation/comparison/pairwise_string.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f6790c46",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.6) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.evaluation.comparison import PairwiseStringEvalChain\n",
    "\n",
    "llm = ChatOpenAI(model=\"gpt-4\")\n",
    "\n",
    "eval_chain = PairwiseStringEvalChain.from_llm(llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "49ad9139",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reasoning': \"Both responses A and B accurately answer the question, but neither response provides any additional detail or context. Response A is slightly more complete, as it uses full sentences to convey the information, while response B provides just the number. However, both responses are fairly equal in relevance, accuracy, and depth. The lack of detail in both responses doesn't allow for a clear winner based on creativity or detail. \\n\\nTherefore, my rating is a tie. \\n\",\n",
       " 'value': None,\n",
       " 'score': 0.5}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eval_chain.evaluate_string_pairs(\n",
    "    output_a = \"there are three dogs\",\n",
    "    output_b=\"4\",\n",
    "    input=\"how many dogs are in the park?\",\n",
    "    reference=\"four\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "586320da",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}