community: Apache AGE wrapper additional edge cases. (#28151)

Description: 
Current AGEGraph() implementation does some custom wrapping for graph
queries. The method here is _wrap_query() as it parse the field from the
original query to add some SQL context to it.
This improves the current parsing logic to cover additional edge cases
that are added to the test coverage, basically if any Node property name
or value has the "return" literal in it will break the graph / SQL
query.
We discovered this while dealing with real world datasets, is not an
uncommon scenario and I think it needs to be covered.
This commit is contained in:
German Martin 2024-12-16 13:28:01 -03:00 committed by GitHub
parent 768e4a7fd4
commit d5d18c62b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 36 additions and 4 deletions

View File

@ -491,11 +491,14 @@ class AGEGraph(GraphStore):
$$) AS ({fields});"""
# if there are any returned fields they must be added to the pgsql query
if "return" in query.lower():
return_match = re.search(r'\breturn\b(?![^"]*")', query, re.IGNORECASE)
if return_match:
# Extract the part of the query after the RETURN keyword
return_clause = query[return_match.end() :]
# parse return statement to identify returned fields
fields = (
query.lower()
.split("return")[-1]
return_clause.lower()
.split("distinct")[-1]
.split("order by")[0]
.split("skip")[0]
@ -517,7 +520,11 @@ class AGEGraph(GraphStore):
# build resulting pgsql relation
fields_str = ", ".join(
[field.split(".")[-1] + " agtype" for field in fields]
[
field.split(".")[-1] + " agtype"
for field in fields
if field.split(".")[-1]
]
)
# if no return statement we still need to return a single field of type agtype

View File

@ -54,6 +54,7 @@ class TestAGEGraph(unittest.TestCase):
def test_wrap_query(self) -> None:
inputs = [
# Positive case: Simple return clause
"""
MATCH (keanu:Person {name:'Keanu Reeves'})
RETURN keanu.name AS name, keanu.born AS born
@ -61,9 +62,20 @@ class TestAGEGraph(unittest.TestCase):
"""
MERGE (n:a {id: 1})
""",
# Negative case: Return in a string value
"""
MATCH (n {description: "This will return a value"})
MERGE (n)-[:RELATED]->(m)
""",
# Negative case: Return in a property key
"""
MATCH (n {returnValue: "some value"})
MERGE (n)-[:RELATED]->(m)
""",
]
expected = [
# Expected output for the first positive case
"""
SELECT * FROM ag_catalog.cypher('test', $$
MATCH (keanu:Person {name:'Keanu Reeves'})
@ -75,6 +87,19 @@ class TestAGEGraph(unittest.TestCase):
MERGE (n:a {id: 1})
$$) AS (a agtype);
""",
# Expected output for the negative cases (no return clause)
"""
SELECT * FROM ag_catalog.cypher('test', $$
MATCH (n {description: "This will return a value"})
MERGE (n)-[:RELATED]->(m)
$$) AS (a agtype);
""",
"""
SELECT * FROM ag_catalog.cypher('test', $$
MATCH (n {returnValue: "some value"})
MERGE (n)-[:RELATED]->(m)
$$) AS (a agtype);
""",
]
for idx, value in enumerate(inputs):