diff --git a/cookbook/oracleai_demo.ipynb b/cookbook/oracleai_demo.ipynb index ad0a6385cb7..8d67e122833 100644 --- a/cookbook/oracleai_demo.ipynb +++ b/cookbook/oracleai_demo.ipynb @@ -526,8 +526,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "***Note:*** Currently, OracleEmbeddings processes each embedding generation request individually, without batching, by calling REST endpoints separately for each request. This method could potentially lead to exceeding the maximum request per minute quota set by some providers. However, we are actively working to enhance this process by implementing request batching, which will allow multiple embedding requests to be combined into fewer API calls, thereby optimizing our use of provider resources and adhering to their request limits. This update is expected to be rolled out soon, eliminating the current limitation.\n", - "\n", "***Note:*** Users may need to configure a proxy to utilize third-party embedding generation providers, excluding the 'database' provider that utilizes an ONNX model." ] }, diff --git a/docs/docs/integrations/text_embedding/oracleai.ipynb b/docs/docs/integrations/text_embedding/oracleai.ipynb index cfda80026ba..1cb2c2adca7 100644 --- a/docs/docs/integrations/text_embedding/oracleai.ipynb +++ b/docs/docs/integrations/text_embedding/oracleai.ipynb @@ -193,13 +193,6 @@ "Oracle AI Vector Search provides multiple methods for generating embeddings, utilizing either locally hosted ONNX models or third-party APIs. For comprehensive instructions on configuring these alternatives, please refer to the [Oracle AI Vector Search Guide](https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/dbms_vector_chain1.html#GUID-C6439E94-4E86-4ECD-954E-4B73D53579DE)." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "***Note:*** Currently, OracleEmbeddings processes each embedding generation request individually, without batching, by calling REST endpoints separately for each request. This method could potentially lead to exceeding the maximum request per minute quota set by some providers. However, we are actively working to enhance this process by implementing request batching, which will allow multiple embedding requests to be combined into fewer API calls, thereby optimizing our use of provider resources and adhering to their request limits. This update is expected to be rolled out soon, eliminating the current limitation." - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/libs/community/langchain_community/embeddings/oracleai.py b/libs/community/langchain_community/embeddings/oracleai.py index ca2dc7f5b73..24105cf0204 100644 --- a/libs/community/langchain_community/embeddings/oracleai.py +++ b/libs/community/langchain_community/embeddings/oracleai.py @@ -118,23 +118,29 @@ class OracleEmbeddings(BaseModel, Embeddings): "begin utl_http.set_proxy(:proxy); end;", proxy=self.proxy ) - for text in texts: - cursor.execute( - "select t.* " - + "from dbms_vector_chain.utl_to_embeddings(:content, " - + "json(:params)) t", - content=text, - params=json.dumps(self.params), - ) + chunks = [] + for i, text in enumerate(texts, start=1): + chunk = {"chunk_id": i, "chunk_data": text} + chunks.append(json.dumps(chunk)) - for row in cursor: - if row is None: - embeddings.append([]) - else: - rdata = json.loads(row[0]) - # dereference string as array - vec = json.loads(rdata["embed_vector"]) - embeddings.append(vec) + vector_array_type = self.conn.gettype("SYS.VECTOR_ARRAY_T") + inputs = vector_array_type.newobject(chunks) + cursor.execute( + "select t.* " + + "from dbms_vector_chain.utl_to_embeddings(:content, " + + "json(:params)) t", + content=inputs, + params=json.dumps(self.params), + ) + + for row in cursor: + if row is None: + embeddings.append([]) + else: + rdata = json.loads(row[0]) + # dereference string as array + vec = json.loads(rdata["embed_vector"]) + embeddings.append(vec) cursor.close() return embeddings @@ -159,20 +165,27 @@ class OracleEmbeddings(BaseModel, Embeddings): """ # A sample unit test. -''' get the Oracle connection ''' +import oracledb +# get the Oracle connection conn = oracledb.connect( - user="", - password="", - dsn="") + user="", + password="", + dsn="/", +) print("Oracle connection is established...") -''' params ''' -embedder_params = {"provider":"database", "model":"demo_model"} +# params +embedder_params = {"provider": "database", "model": "demo_model"} proxy = "" -''' instance ''' +# instance embedder = OracleEmbeddings(conn=conn, params=embedder_params, proxy=proxy) +docs = ["hello world!", "hi everyone!", "greetings!"] +embeds = embedder.embed_documents(docs) +print(f"Total Embeddings: {len(embeds)}") +print(f"Embedding generated by OracleEmbeddings: {embeds[0]}\n") + embed = embedder.embed_query("Hello World!") print(f"Embedding generated by OracleEmbeddings: {embed}")