community[patch]: Fix sparkllm embeddings api bug. (#19122)

- **Description:** Fix sparkllm embeddings api bug.
@baskaryan PTAL
This commit is contained in:
Guangdong Liu 2024-03-16 06:08:49 +08:00 committed by GitHub
parent b9c62fb905
commit cced3eb9bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 36 deletions

View File

@ -25,14 +25,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:13.753824100Z",
"start_time": "2024-03-15T09:36:13.225834400Z"
}
},
"outputs": [],
"source": [
"from langchain_community.embeddings import SparkLLMTextEmbeddings\n",
"\n",
"embeddings = SparkLLMTextEmbeddings(\n",
" spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n",
" spark_app_id=\"<spark_app_id>\",\n",
" spark_api_key=\"<spark_api_key>\",\n",
" spark_api_secret=\"<spark_api_secret>\",\n",
")"
]
},
@ -45,44 +52,67 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:25.436201400Z",
"start_time": "2024-03-15T09:36:25.313456600Z"
}
},
"outputs": [
{
"data": {
"text/plain": "[-0.043609619140625,\n 0.2017822265625,\n 0.0270843505859375,\n -0.250244140625,\n -0.024993896484375,\n -0.0382080078125,\n 0.06207275390625,\n -0.0146331787109375]"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"text_q = \"Introducing iFlytek\"\n",
"\n",
"os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n",
"os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n",
"os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\""
"text_1 = \"Science and Technology Innovation Company Limited, commonly known as iFlytek, is a leading Chinese technology company specializing in speech recognition, natural language processing, and artificial intelligence. With a rich history and remarkable achievements, iFlytek has emerged as a frontrunner in the field of intelligent speech and language technologies.iFlytek has made significant contributions to the advancement of human-computer interaction through its cutting-edge innovations. Their advanced speech recognition technology has not only improved the accuracy and efficiency of voice input systems but has also enabled seamless integration of voice commands into various applications and devices.The company's commitment to research and development has been instrumental in its success. iFlytek invests heavily in fostering talent and collaboration with academic institutions, resulting in groundbreaking advancements in speech synthesis and machine translation. Their dedication to innovation has not only transformed the way we communicate but has also enhanced accessibility for individuals with disabilities.\"\n",
"\n",
"text_2 = \"Moreover, iFlytek's impact extends beyond domestic boundaries, as they actively promote international cooperation and collaboration in the field of artificial intelligence. They have consistently participated in global competitions and contributed to the development of international standards.In recognition of their achievements, iFlytek has received numerous accolades and awards both domestically and internationally. Their contributions have revolutionized the way we interact with technology and have paved the way for a future where voice-based interfaces play a vital role.Overall, iFlytek is a trailblazer in the field of intelligent speech and language technologies, and their commitment to innovation and excellence deserves commendation.\"\n",
"\n",
"query_result = embeddings.embed_query(text_q)\n",
"query_result[:8]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n",
"text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as Chinas first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n",
"\n",
"query_result = embeddings.embed_query(text_2)\n",
"query_result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:54.657224Z",
"start_time": "2024-03-15T09:36:54.404690400Z"
}
},
"outputs": [
{
"data": {
"text/plain": "[-0.161865234375,\n 0.58984375,\n 0.998046875,\n 0.365966796875,\n 0.72900390625,\n 0.6015625,\n -0.8408203125,\n -0.2666015625]"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_result = embeddings.embed_documents([text_1, text_2])\n",
"doc_result"
"doc_result[0][:8]"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
}
},
"nbformat": 4,

View File

@ -70,14 +70,21 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
api_key=self.spark_api_key.get_secret_value(),
api_secret=self.spark_api_secret.get_secret_value(),
)
content = self._get_body(self.spark_app_id.get_secret_value(), texts)
response = requests.post(
url, json=content, headers={"content-type": "application/json"}
).text
res_arr = self._parser_message(response)
if res_arr is not None:
return res_arr.tolist()
return None
embed_result: list = []
for text in texts:
query_context = {"messages": [{"content": text, "role": "user"}]}
content = self._get_body(
self.spark_app_id.get_secret_value(), query_context
)
response = requests.post(
url, json=content, headers={"content-type": "application/json"}
).text
res_arr = self._parser_message(response)
if res_arr is not None:
embed_result.append(res_arr.tolist())
else:
embed_result.append(None)
return embed_result
def embed_documents(self, texts: List[str]) -> Optional[List[List[float]]]: # type: ignore[override]
"""Public method to get embeddings for a list of documents.
@ -145,7 +152,7 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
return u
@staticmethod
def _get_body(appid: str, text: List[str]) -> Dict[str, Any]:
def _get_body(appid: str, text: dict) -> Dict[str, Any]:
body = {
"header": {"app_id": appid, "uid": "39769795890", "status": 3},
"parameter": {"emb": {"feature": {"encoding": "utf8"}}},