From dc3ca44e055e7e7263d2b1e9c50d8e5ef5ae9f77 Mon Sep 17 00:00:00 2001
From: bheroder <89942393+bheroder@users.noreply.github.com>
Date: Thu, 27 Jul 2023 16:56:06 -0700
Subject: [PATCH] Add an example for azure ml managed feature store (#8324)

We are adding an example of how one can connect to azure ml managed
feature store and use such a prompt template in a llm chain. @baskaryan
---
 .../connecting_to_a_feature_store.ipynb       | 211 ++++++++++++++++++
 1 file changed, 211 insertions(+)

diff --git a/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb b/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb
index 036fe71d3e1..4a690db6358 100644
--- a/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb
+++ b/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb
@@ -597,6 +597,217 @@
    "source": [
     "chain.run(\"C1410926\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b99ac57",
+   "metadata": {},
+   "source": [
+    "## AzureML Managed Feature Store\n",
+    "\n",
+    "We will use [AzureML Managed Feature Store](https://learn.microsoft.com/en-us/azure/machine-learning/concept-what-is-managed-feature-store) to run the below example. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ebf16d2",
+   "metadata": {},
+   "source": [
+    "### Prerequisites\n",
+    "\n",
+    "* Create feature store with online materialization using instructions here [Enable online materialization and run online inference](https://github.com/Azure/azureml-examples/blob/featurestore/online/sdk/python/featurestore_sample/notebooks/sdk_only/5.%20Enable%20online%20store%20and%20run%20online%20inference.ipynb).\n",
+    "\n",
+    "* A successfully created feature store by following the instructions should have an `account` featureset with version as `1`. It will have `accountID` as index column with features `accountAge`, `accountCountry`, `numPaymentRejects1dPerUser`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b1ad8ee",
+   "metadata": {},
+   "source": [
+    "### Prompts\n",
+    "\n",
+    "* Here we will set up a custom AzureMLFeatureStorePromptTemplate. This prompt template will take in an `account_id` and optional `query`. It then fetches feature values from feature store and format those features into the output prompt. Note that the required input to this prompt template is just `account_id`, since that is the only user defined piece (all other variables are looked up inside the prompt template).\n",
+    "\n",
+    "* Also note that this is a bootstrap example to showcase how LLM applications can leverage AzureML managed feature store. Developers are welcome to improve the prompt template further to suit their needs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "bd54e256",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ['AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED'] = 'True'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "5f935e7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "from pydantic import Extra\n",
+    "from langchain.prompts import PromptTemplate, StringPromptTemplate\n",
+    "from azure.identity import AzureCliCredential\n",
+    "from azureml.featurestore import FeatureStoreClient, init_online_lookup, get_online_features\n",
+    "\n",
+    "class AzureMLFeatureStorePromptTemplate(StringPromptTemplate, extra=Extra.allow):\n",
+    "\n",
+    "    def __init__(self, subscription_id: str, resource_group: str, feature_store_name: str, **kwargs):\n",
+    "        # this is an example template for proof of concept and can be changed to suit the developer needs\n",
+    "        template = \"\"\"\n",
+    "            {query}\n",
+    "            ###\n",
+    "            account id = {account_id}\n",
+    "            account age = {account_age}\n",
+    "            account country = {account_country}\n",
+    "            payment rejects 1d per user = {payment_rejects_1d_per_user}\n",
+    "            ###\n",
+    "            \"\"\"\n",
+    "        prompt_template=PromptTemplate.from_template(template)\n",
+    "        super().__init__(prompt=prompt_template, input_variables=[\"account_id\", \"query\"])\n",
+    "\n",
+    "        # use AzureMLOnBehalfOfCredential() in spark context\n",
+    "        credential = AzureCliCredential()\n",
+    "\n",
+    "        self._fs_client = FeatureStoreClient(\n",
+    "            credential=credential,\n",
+    "            subscription_id=subscription_id,\n",
+    "            resource_group_name=resource_group,\n",
+    "            name=feature_store_name)\n",
+    "        \n",
+    "        self._feature_set = self._fs_client.feature_sets.get(name=\"accounts\", version=1)\n",
+    "\n",
+    "        init_online_lookup(self._feature_set.features, credential, force=True)\n",
+    "        \n",
+    "\n",
+    "    def format(self, **kwargs) -> str:    \n",
+    "        if \"account_id\" not in kwargs:\n",
+    "            raise \"account_id needed to fetch details from feature store\"\n",
+    "        account_id = kwargs.pop(\"account_id\")        \n",
+    "\n",
+    "        query=\"\"\n",
+    "        if \"query\" in kwargs:\n",
+    "            query = kwargs.pop(\"query\")\n",
+    "\n",
+    "        # feature set is registered with accountID as entity index column.\n",
+    "        obs = pandas.DataFrame({'accountID': [account_id]})\n",
+    "\n",
+    "        # get the feature details for the input entity from feature store.\n",
+    "        df = get_online_features(self._feature_set.features, obs)        \n",
+    "\n",
+    "        # populate prompt template output using the fetched feature values.\n",
+    "        kwargs[\"query\"] = query\n",
+    "        kwargs[\"account_id\"] = account_id\n",
+    "        kwargs[\"account_age\"] = df[\"accountAge\"][0]\n",
+    "        kwargs[\"account_country\"] = df[\"accountCountry\"][0]\n",
+    "        kwargs[\"payment_rejects_1d_per_user\"] = df[\"numPaymentRejects1dPerUser\"][0]\n",
+    "\n",
+    "        return self.prompt.format(**kwargs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "28f148b0",
+   "metadata": {},
+   "source": [
+    "### Test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84571856",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Replace the place holders below with actual details of feature store that was created in previous steps\n",
+    "\n",
+    "prompt_template = AzureMLFeatureStorePromptTemplate(\n",
+    "            subscription_id=\"\",\n",
+    "            resource_group=\"\",\n",
+    "            feature_store_name=\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "99703f42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "            \n",
+      "            ###\n",
+      "            account id = A1829581630230790\n",
+      "            account age = 563.0\n",
+      "            account country = GB\n",
+      "            payment rejects 1d per user = 15.0\n",
+      "            ###\n",
+      "            \n"
+     ]
+    }
+   ],
+   "source": [
+    "print(prompt_template.format(account_id=\"A1829581630230790\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c8830d12",
+   "metadata": {},
+   "source": [
+    "### Use in a chain\n",
+    "\n",
+    "We can now use this in a chain, successfully creating a chain that achieves personalization backed by the AzureML Managed Feature Store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "33266cb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"]=\"\" # Fill the open ai key here\n",
+    "\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain import LLMChain\n",
+    "\n",
+    "chain = LLMChain(llm=ChatOpenAI(), prompt=prompt_template)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "67ae8934",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Thank you for being a valued member for over 10 years! We appreciate your continued support.'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# NOTE: developer's can further fine tune AzureMLFeatureStorePromptTemplate\n",
+    "# for getting even more accurate results for the input query\n",
+    "chain.predict(account_id=\"A1829581630230790\", query =\"write a small thank you note within 20 words if account age > 10 using the account stats\")"
+   ]
   }
  ],
  "metadata": {