notebook fmt (#12498)

This commit is contained in:
Bagatur
2023-10-29 15:50:09 -07:00
committed by GitHub
parent 56cc5b847c
commit 2424fff3f1
342 changed files with 8261 additions and 6796 deletions

View File

@@ -51,8 +51,14 @@
"metadata": {},
"outputs": [],
"source": [
"details = pd.read_csv(\"~/Downloads/archive/Hotel_details.csv\").drop_duplicates(subset=\"hotelid\").set_index(\"hotelid\")\n",
"attributes = pd.read_csv(\"~/Downloads/archive/Hotel_Room_attributes.csv\", index_col=\"id\")\n",
"details = (\n",
" pd.read_csv(\"~/Downloads/archive/Hotel_details.csv\")\n",
" .drop_duplicates(subset=\"hotelid\")\n",
" .set_index(\"hotelid\")\n",
")\n",
"attributes = pd.read_csv(\n",
" \"~/Downloads/archive/Hotel_Room_attributes.csv\", index_col=\"id\"\n",
")\n",
"price = pd.read_csv(\"~/Downloads/archive/hotels_RoomPrice.csv\", index_col=\"id\")"
]
},
@@ -208,9 +214,20 @@
}
],
"source": [
"latest_price = price.drop_duplicates(subset=\"refid\", keep=\"last\")[[\"hotelcode\", \"roomtype\", \"onsiterate\", \"roomamenities\", \"maxoccupancy\", \"mealinclusiontype\"]]\n",
"latest_price = price.drop_duplicates(subset=\"refid\", keep=\"last\")[\n",
" [\n",
" \"hotelcode\",\n",
" \"roomtype\",\n",
" \"onsiterate\",\n",
" \"roomamenities\",\n",
" \"maxoccupancy\",\n",
" \"mealinclusiontype\",\n",
" ]\n",
"]\n",
"latest_price[\"ratedescription\"] = attributes.loc[latest_price.index][\"ratedescription\"]\n",
"latest_price = latest_price.join(details[[\"hotelname\", \"city\", \"country\", \"starrating\"]], on=\"hotelcode\")\n",
"latest_price = latest_price.join(\n",
" details[[\"hotelname\", \"city\", \"country\", \"starrating\"]], on=\"hotelcode\"\n",
")\n",
"latest_price = latest_price.rename({\"ratedescription\": \"roomdescription\"}, axis=1)\n",
"latest_price[\"mealsincluded\"] = ~latest_price[\"mealinclusiontype\"].isnull()\n",
"latest_price.pop(\"hotelcode\")\n",
@@ -244,7 +261,7 @@
"res = model.predict(\n",
" \"Below is a table with information about hotel rooms. \"\n",
" \"Return a JSON list with an entry for each column. Each entry should have \"\n",
" \"{\\\"name\\\": \\\"column name\\\", \\\"description\\\": \\\"column description\\\", \\\"type\\\": \\\"column data type\\\"}\"\n",
" '{\"name\": \"column name\", \"description\": \"column description\", \"type\": \"column data type\"}'\n",
" f\"\\n\\n{latest_price.head()}\\n\\nJSON:\\n\"\n",
")"
]
@@ -338,9 +355,15 @@
"metadata": {},
"outputs": [],
"source": [
"attribute_info[-2]['description'] += f\". Valid values are {sorted(latest_price['starrating'].value_counts().index.tolist())}\"\n",
"attribute_info[3]['description'] += f\". Valid values are {sorted(latest_price['maxoccupancy'].value_counts().index.tolist())}\"\n",
"attribute_info[-3]['description'] += f\". Valid values are {sorted(latest_price['country'].value_counts().index.tolist())}\""
"attribute_info[-2][\n",
" \"description\"\n",
"] += f\". Valid values are {sorted(latest_price['starrating'].value_counts().index.tolist())}\"\n",
"attribute_info[3][\n",
" \"description\"\n",
"] += f\". Valid values are {sorted(latest_price['maxoccupancy'].value_counts().index.tolist())}\"\n",
"attribute_info[-3][\n",
" \"description\"\n",
"] += f\". Valid values are {sorted(latest_price['country'].value_counts().index.tolist())}\""
]
},
{
@@ -408,7 +431,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.query_constructor.base import get_query_constructor_prompt, load_query_constructor_runnable"
"from langchain.chains.query_constructor.base import (\n",
" get_query_constructor_prompt,\n",
" load_query_constructor_runnable,\n",
")"
]
},
{
@@ -592,7 +618,9 @@
"metadata": {},
"outputs": [],
"source": [
"chain = load_query_constructor_runnable(ChatOpenAI(model='gpt-3.5-turbo', temperature=0), doc_contents, attribute_info)"
"chain = load_query_constructor_runnable(\n",
" ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0), doc_contents, attribute_info\n",
")"
]
},
{
@@ -634,7 +662,11 @@
}
],
"source": [
"chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})"
"chain.invoke(\n",
" {\n",
" \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n",
" }\n",
")"
]
},
{
@@ -656,10 +688,12 @@
"metadata": {},
"outputs": [],
"source": [
"attribute_info[-3]['description'] += \". NOTE: Only use the 'eq' operator if a specific country is mentioned. If a region is mentioned, include all relevant countries in filter.\"\n",
"attribute_info[-3][\n",
" \"description\"\n",
"] += \". NOTE: Only use the 'eq' operator if a specific country is mentioned. If a region is mentioned, include all relevant countries in filter.\"\n",
"chain = load_query_constructor_runnable(\n",
" ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n",
" doc_contents, \n",
" ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
" doc_contents,\n",
" attribute_info,\n",
")"
]
@@ -704,10 +738,12 @@
"source": [
"content_attr = [\"roomtype\", \"roomamenities\", \"roomdescription\", \"hotelname\"]\n",
"doc_contents = \"A detailed description of a hotel room, including information about the room type and room amenities.\"\n",
"filter_attribute_info = tuple(ai for ai in attribute_info if ai[\"name\"] not in content_attr)\n",
"filter_attribute_info = tuple(\n",
" ai for ai in attribute_info if ai[\"name\"] not in content_attr\n",
")\n",
"chain = load_query_constructor_runnable(\n",
" ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n",
" doc_contents, \n",
" ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
" doc_contents,\n",
" filter_attribute_info,\n",
")"
]
@@ -730,7 +766,11 @@
}
],
"source": [
"chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})"
"chain.invoke(\n",
" {\n",
" \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n",
" }\n",
")"
]
},
{
@@ -860,14 +900,22 @@
"examples = [\n",
" (\n",
" \"I want a hotel in the Balkans with a king sized bed and a hot tub. Budget is $300 a night\",\n",
" {\"query\": \"king-sized bed, hot tub\", \"filter\": 'and(in(\"country\", [\"Bulgaria\", \"Greece\", \"Croatia\", \"Serbia\"]), lte(\"onsiterate\", 300))'}\n",
" {\n",
" \"query\": \"king-sized bed, hot tub\",\n",
" \"filter\": 'and(in(\"country\", [\"Bulgaria\", \"Greece\", \"Croatia\", \"Serbia\"]), lte(\"onsiterate\", 300))',\n",
" },\n",
" ),\n",
" (\n",
" \"A room with breakfast included for 3 people, at a Hilton\",\n",
" {\"query\": \"Hilton\", \"filter\": 'and(eq(\"mealsincluded\", true), gte(\"maxoccupancy\", 3))'}\n",
" {\n",
" \"query\": \"Hilton\",\n",
" \"filter\": 'and(eq(\"mealsincluded\", true), gte(\"maxoccupancy\", 3))',\n",
" },\n",
" ),\n",
"]\n",
"prompt = get_query_constructor_prompt(doc_contents, filter_attribute_info, examples=examples)\n",
"prompt = get_query_constructor_prompt(\n",
" doc_contents, filter_attribute_info, examples=examples\n",
")\n",
"print(prompt.format(query=\"{query}\"))"
]
},
@@ -879,10 +927,10 @@
"outputs": [],
"source": [
"chain = load_query_constructor_runnable(\n",
" ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n",
" doc_contents, \n",
" ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
" doc_contents,\n",
" filter_attribute_info,\n",
" examples=examples\n",
" examples=examples,\n",
")"
]
},
@@ -904,7 +952,11 @@
}
],
"source": [
"chain.invoke({\"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"})"
"chain.invoke(\n",
" {\n",
" \"query\": \"Find a 2-person room in Vienna or London, preferably with meals included and AC\"\n",
" }\n",
")"
]
},
{
@@ -956,7 +1008,11 @@
}
],
"source": [
"chain.invoke({\"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"})"
"chain.invoke(\n",
" {\n",
" \"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"\n",
" }\n",
")"
]
},
{
@@ -977,11 +1033,11 @@
"outputs": [],
"source": [
"chain = load_query_constructor_runnable(\n",
" ChatOpenAI(model='gpt-3.5-turbo', temperature=0), \n",
" doc_contents, \n",
" ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0),\n",
" doc_contents,\n",
" filter_attribute_info,\n",
" examples=examples,\n",
" fix_invalid=True\n",
" fix_invalid=True,\n",
")"
]
},
@@ -1003,7 +1059,11 @@
}
],
"source": [
"chain.invoke({\"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"})"
"chain.invoke(\n",
" {\n",
" \"query\": \"I want to stay somewhere highly rated along the coast. I want a room with a patio and a fireplace.\"\n",
" }\n",
")"
]
},
{
@@ -1056,8 +1116,8 @@
"# docs.append(doc)\n",
"# vecstore = ElasticsearchStore.from_documents(\n",
"# docs,\n",
"# embeddings, \n",
"# es_url=\"http://localhost:9200\", \n",
"# embeddings,\n",
"# es_url=\"http://localhost:9200\",\n",
"# index_name=\"hotel_rooms\",\n",
"# # strategy=ElasticsearchStore.ApproxRetrievalStrategy(\n",
"# # hybrid=True,\n",
@@ -1073,9 +1133,9 @@
"outputs": [],
"source": [
"vecstore = ElasticsearchStore(\n",
" \"hotel_rooms\", \n",
" embedding=embeddings, \n",
" es_url=\"http://localhost:9200\", \n",
" \"hotel_rooms\",\n",
" embedding=embeddings,\n",
" es_url=\"http://localhost:9200\",\n",
" # strategy=ElasticsearchStore.ApproxRetrievalStrategy(hybrid=True) # seems to not be available in community version\n",
")"
]
@@ -1089,7 +1149,9 @@
"source": [
"from langchain.retrievers import SelfQueryRetriever\n",
"\n",
"retriever = SelfQueryRetriever(query_constructor=chain, vectorstore=vecstore, verbose=True)"
"retriever = SelfQueryRetriever(\n",
" query_constructor=chain, vectorstore=vecstore, verbose=True\n",
")"
]
},
{