mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-03 03:59:42 +00:00
Cube semantic loader: allow cubes processing (#9927)
We've started to receive feedback (after launch) that using only views is confusing. We're considering this as a good practice, as a view serves as a "facade" for your data - however, we decided to let users decide this on their own. Solves the questions from: - https://github.com/cube-js/cube/issues/7028 - https://github.com/langchain-ai/langchain/pull/9690
This commit is contained in:
@@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader):
|
||||
- column_title
|
||||
- column_description
|
||||
- column_values
|
||||
- cube_data_obj_type
|
||||
"""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": self.cube_api_token,
|
||||
}
|
||||
|
||||
logger.info(f"Loading metadata from {self.cube_api_url}...")
|
||||
response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
|
||||
response.raise_for_status()
|
||||
raw_meta_json = response.json()
|
||||
cubes = raw_meta_json.get("cubes", [])
|
||||
cube_data_objects = raw_meta_json.get("cubes", [])
|
||||
|
||||
logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.")
|
||||
|
||||
if not cube_data_objects:
|
||||
raise ValueError("No cubes found in metadata.")
|
||||
|
||||
docs = []
|
||||
|
||||
for cube in cubes:
|
||||
if cube.get("type") != "view":
|
||||
for cube_data_obj in cube_data_objects:
|
||||
cube_data_obj_name = cube_data_obj.get("name")
|
||||
cube_data_obj_type = cube_data_obj.get("type")
|
||||
cube_data_obj_is_public = cube_data_obj.get("public")
|
||||
measures = cube_data_obj.get("measures", [])
|
||||
dimensions = cube_data_obj.get("dimensions", [])
|
||||
|
||||
logger.info(f"Processing {cube_data_obj_name}...")
|
||||
|
||||
if not cube_data_obj_is_public:
|
||||
logger.info(f"Skipping {cube_data_obj_name} because it is not public.")
|
||||
continue
|
||||
|
||||
cube_name = cube.get("name")
|
||||
|
||||
measures = cube.get("measures", [])
|
||||
dimensions = cube.get("dimensions", [])
|
||||
|
||||
for item in measures + dimensions:
|
||||
column_member_type = "measure" if item in measures else "dimension"
|
||||
dimension_values = []
|
||||
@@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader):
|
||||
dimension_values = self._get_dimension_values(item_name)
|
||||
|
||||
metadata = dict(
|
||||
table_name=str(cube_name),
|
||||
table_name=str(cube_data_obj_name),
|
||||
column_name=item_name,
|
||||
column_data_type=item_type,
|
||||
column_title=str(item.get("title")),
|
||||
column_description=str(item.get("description")),
|
||||
column_member_type=column_member_type,
|
||||
column_values=dimension_values,
|
||||
cube_data_obj_type=cube_data_obj_type,
|
||||
)
|
||||
|
||||
page_content = f"{str(item.get('title'))}, "
|
||||
|
@@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase):
|
||||
{
|
||||
"name": "test_cube",
|
||||
"type": "view",
|
||||
"public": True,
|
||||
"measures": [],
|
||||
"dimensions": [
|
||||
{
|
||||
|
Reference in New Issue
Block a user