From dc3ca44e055e7e7263d2b1e9c50d8e5ef5ae9f77 Mon Sep 17 00:00:00 2001 From: bheroder <89942393+bheroder@users.noreply.github.com> Date: Thu, 27 Jul 2023 16:56:06 -0700 Subject: [PATCH] Add an example for azure ml managed feature store (#8324) We are adding an example of how one can connect to azure ml managed feature store and use such a prompt template in a llm chain. @baskaryan --- .../connecting_to_a_feature_store.ipynb | 211 ++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb b/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb index 036fe71d3e1..4a690db6358 100644 --- a/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb +++ b/docs/extras/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb @@ -597,6 +597,217 @@ "source": [ "chain.run(\"C1410926\")" ] + }, + { + "cell_type": "markdown", + "id": "4b99ac57", + "metadata": {}, + "source": [ + "## AzureML Managed Feature Store\n", + "\n", + "We will use [AzureML Managed Feature Store](https://learn.microsoft.com/en-us/azure/machine-learning/concept-what-is-managed-feature-store) to run the below example. " + ] + }, + { + "cell_type": "markdown", + "id": "1ebf16d2", + "metadata": {}, + "source": [ + "### Prerequisites\n", + "\n", + "* Create feature store with online materialization using instructions here [Enable online materialization and run online inference](https://github.com/Azure/azureml-examples/blob/featurestore/online/sdk/python/featurestore_sample/notebooks/sdk_only/5.%20Enable%20online%20store%20and%20run%20online%20inference.ipynb).\n", + "\n", + "* A successfully created feature store by following the instructions should have an `account` featureset with version as `1`. It will have `accountID` as index column with features `accountAge`, `accountCountry`, `numPaymentRejects1dPerUser`." + ] + }, + { + "cell_type": "markdown", + "id": "8b1ad8ee", + "metadata": {}, + "source": [ + "### Prompts\n", + "\n", + "* Here we will set up a custom AzureMLFeatureStorePromptTemplate. This prompt template will take in an `account_id` and optional `query`. It then fetches feature values from feature store and format those features into the output prompt. Note that the required input to this prompt template is just `account_id`, since that is the only user defined piece (all other variables are looked up inside the prompt template).\n", + "\n", + "* Also note that this is a bootstrap example to showcase how LLM applications can leverage AzureML managed feature store. Developers are welcome to improve the prompt template further to suit their needs." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bd54e256", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED'] = 'True'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5f935e7d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas\n", + "\n", + "from pydantic import Extra\n", + "from langchain.prompts import PromptTemplate, StringPromptTemplate\n", + "from azure.identity import AzureCliCredential\n", + "from azureml.featurestore import FeatureStoreClient, init_online_lookup, get_online_features\n", + "\n", + "class AzureMLFeatureStorePromptTemplate(StringPromptTemplate, extra=Extra.allow):\n", + "\n", + " def __init__(self, subscription_id: str, resource_group: str, feature_store_name: str, **kwargs):\n", + " # this is an example template for proof of concept and can be changed to suit the developer needs\n", + " template = \"\"\"\n", + " {query}\n", + " ###\n", + " account id = {account_id}\n", + " account age = {account_age}\n", + " account country = {account_country}\n", + " payment rejects 1d per user = {payment_rejects_1d_per_user}\n", + " ###\n", + " \"\"\"\n", + " prompt_template=PromptTemplate.from_template(template)\n", + " super().__init__(prompt=prompt_template, input_variables=[\"account_id\", \"query\"])\n", + "\n", + " # use AzureMLOnBehalfOfCredential() in spark context\n", + " credential = AzureCliCredential()\n", + "\n", + " self._fs_client = FeatureStoreClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group,\n", + " name=feature_store_name)\n", + " \n", + " self._feature_set = self._fs_client.feature_sets.get(name=\"accounts\", version=1)\n", + "\n", + " init_online_lookup(self._feature_set.features, credential, force=True)\n", + " \n", + "\n", + " def format(self, **kwargs) -> str: \n", + " if \"account_id\" not in kwargs:\n", + " raise \"account_id needed to fetch details from feature store\"\n", + " account_id = kwargs.pop(\"account_id\") \n", + "\n", + " query=\"\"\n", + " if \"query\" in kwargs:\n", + " query = kwargs.pop(\"query\")\n", + "\n", + " # feature set is registered with accountID as entity index column.\n", + " obs = pandas.DataFrame({'accountID': [account_id]})\n", + "\n", + " # get the feature details for the input entity from feature store.\n", + " df = get_online_features(self._feature_set.features, obs) \n", + "\n", + " # populate prompt template output using the fetched feature values.\n", + " kwargs[\"query\"] = query\n", + " kwargs[\"account_id\"] = account_id\n", + " kwargs[\"account_age\"] = df[\"accountAge\"][0]\n", + " kwargs[\"account_country\"] = df[\"accountCountry\"][0]\n", + " kwargs[\"payment_rejects_1d_per_user\"] = df[\"numPaymentRejects1dPerUser\"][0]\n", + "\n", + " return self.prompt.format(**kwargs)\n" + ] + }, + { + "cell_type": "markdown", + "id": "28f148b0", + "metadata": {}, + "source": [ + "### Test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84571856", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace the place holders below with actual details of feature store that was created in previous steps\n", + "\n", + "prompt_template = AzureMLFeatureStorePromptTemplate(\n", + " subscription_id=\"\",\n", + " resource_group=\"\",\n", + " feature_store_name=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "99703f42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + " ###\n", + " account id = A1829581630230790\n", + " account age = 563.0\n", + " account country = GB\n", + " payment rejects 1d per user = 15.0\n", + " ###\n", + " \n" + ] + } + ], + "source": [ + "print(prompt_template.format(account_id=\"A1829581630230790\"))" + ] + }, + { + "cell_type": "markdown", + "id": "c8830d12", + "metadata": {}, + "source": [ + "### Use in a chain\n", + "\n", + "We can now use this in a chain, successfully creating a chain that achieves personalization backed by the AzureML Managed Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "33266cb5", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"]=\"\" # Fill the open ai key here\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain import LLMChain\n", + "\n", + "chain = LLMChain(llm=ChatOpenAI(), prompt=prompt_template)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "67ae8934", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Thank you for being a valued member for over 10 years! We appreciate your continued support.'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NOTE: developer's can further fine tune AzureMLFeatureStorePromptTemplate\n", + "# for getting even more accurate results for the input query\n", + "chain.predict(account_id=\"A1829581630230790\", query =\"write a small thank you note within 20 words if account age > 10 using the account stats\")" + ] } ], "metadata": {