mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
Cube semantic loader: allow cubes processing (#9927)
We've started to receive feedback (after launch) that using only views is confusing. We're considering this as a good practice, as a view serves as a "facade" for your data - however, we decided to let users decide this on their own. Solves the questions from: - https://github.com/cube-js/cube/issues/7028 - https://github.com/langchain-ai/langchain/pull/9690
This commit is contained in:
parent
e80834d783
commit
c80e406e95
@ -106,15 +106,39 @@
|
|||||||
" - `column_data_type`\n",
|
" - `column_data_type`\n",
|
||||||
" - `column_title`\n",
|
" - `column_title`\n",
|
||||||
" - `column_description`\n",
|
" - `column_description`\n",
|
||||||
" - `column_values`"
|
" - `column_values`\n",
|
||||||
|
" - `cube_data_obj_type`"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"cell_type": "code",
|
||||||
"cell_type": "markdown",
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
|
"# Given string containing page content\n",
|
||||||
|
"page_content = 'Users View City, None'\n",
|
||||||
|
"\n",
|
||||||
|
"# Given dictionary containing metadata\n",
|
||||||
|
"metadata = {\n",
|
||||||
|
" 'table_name': 'users_view',\n",
|
||||||
|
" 'column_name': 'users_view.city',\n",
|
||||||
|
" 'column_data_type': 'string',\n",
|
||||||
|
" 'column_title': 'Users View City',\n",
|
||||||
|
" 'column_description': 'None',\n",
|
||||||
|
" 'column_member_type': 'dimension',\n",
|
||||||
|
" 'column_values': [\n",
|
||||||
|
" 'Austin',\n",
|
||||||
|
" 'Chicago',\n",
|
||||||
|
" 'Los Angeles',\n",
|
||||||
|
" 'Mountain View',\n",
|
||||||
|
" 'New York',\n",
|
||||||
|
" 'Palo Alto',\n",
|
||||||
|
" 'San Francisco',\n",
|
||||||
|
" 'Seattle'\n",
|
||||||
|
" ],\n",
|
||||||
|
" 'cube_data_obj_type': 'view'\n",
|
||||||
|
"}"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader):
|
|||||||
- column_title
|
- column_title
|
||||||
- column_description
|
- column_description
|
||||||
- column_values
|
- column_values
|
||||||
|
- cube_data_obj_type
|
||||||
"""
|
"""
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": self.cube_api_token,
|
"Authorization": self.cube_api_token,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(f"Loading metadata from {self.cube_api_url}...")
|
||||||
response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
|
response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
raw_meta_json = response.json()
|
raw_meta_json = response.json()
|
||||||
cubes = raw_meta_json.get("cubes", [])
|
cube_data_objects = raw_meta_json.get("cubes", [])
|
||||||
|
|
||||||
|
logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.")
|
||||||
|
|
||||||
|
if not cube_data_objects:
|
||||||
|
raise ValueError("No cubes found in metadata.")
|
||||||
|
|
||||||
docs = []
|
docs = []
|
||||||
|
|
||||||
for cube in cubes:
|
for cube_data_obj in cube_data_objects:
|
||||||
if cube.get("type") != "view":
|
cube_data_obj_name = cube_data_obj.get("name")
|
||||||
|
cube_data_obj_type = cube_data_obj.get("type")
|
||||||
|
cube_data_obj_is_public = cube_data_obj.get("public")
|
||||||
|
measures = cube_data_obj.get("measures", [])
|
||||||
|
dimensions = cube_data_obj.get("dimensions", [])
|
||||||
|
|
||||||
|
logger.info(f"Processing {cube_data_obj_name}...")
|
||||||
|
|
||||||
|
if not cube_data_obj_is_public:
|
||||||
|
logger.info(f"Skipping {cube_data_obj_name} because it is not public.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cube_name = cube.get("name")
|
|
||||||
|
|
||||||
measures = cube.get("measures", [])
|
|
||||||
dimensions = cube.get("dimensions", [])
|
|
||||||
|
|
||||||
for item in measures + dimensions:
|
for item in measures + dimensions:
|
||||||
column_member_type = "measure" if item in measures else "dimension"
|
column_member_type = "measure" if item in measures else "dimension"
|
||||||
dimension_values = []
|
dimension_values = []
|
||||||
@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader):
|
|||||||
dimension_values = self._get_dimension_values(item_name)
|
dimension_values = self._get_dimension_values(item_name)
|
||||||
|
|
||||||
metadata = dict(
|
metadata = dict(
|
||||||
table_name=str(cube_name),
|
table_name=str(cube_data_obj_name),
|
||||||
column_name=item_name,
|
column_name=item_name,
|
||||||
column_data_type=item_type,
|
column_data_type=item_type,
|
||||||
column_title=str(item.get("title")),
|
column_title=str(item.get("title")),
|
||||||
column_description=str(item.get("description")),
|
column_description=str(item.get("description")),
|
||||||
column_member_type=column_member_type,
|
column_member_type=column_member_type,
|
||||||
column_values=dimension_values,
|
column_values=dimension_values,
|
||||||
|
cube_data_obj_type=cube_data_obj_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
page_content = f"{str(item.get('title'))}, "
|
page_content = f"{str(item.get('title'))}, "
|
||||||
|
@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase):
|
|||||||
{
|
{
|
||||||
"name": "test_cube",
|
"name": "test_cube",
|
||||||
"type": "view",
|
"type": "view",
|
||||||
|
"public": True,
|
||||||
"measures": [],
|
"measures": [],
|
||||||
"dimensions": [
|
"dimensions": [
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user