diff --git a/libs/langchain/langchain/chains/graph_qa/cypher_utils.py b/libs/langchain/langchain/chains/graph_qa/cypher_utils.py index ee399850888..c123cac9b52 100644 --- a/libs/langchain/langchain/chains/graph_qa/cypher_utils.py +++ b/libs/langchain/langchain/chains/graph_qa/cypher_utils.py @@ -14,7 +14,9 @@ class CypherQueryCorrector: property_pattern = re.compile(r"\{.+?\}") node_pattern = re.compile(r"\(.+?\)") - path_pattern = re.compile(r"\(.*\).*-.*-.*\(.*\)") + path_pattern = re.compile( + r"(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))(?)(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))" + ) node_relation_node_pattern = re.compile( r"(\()+(?P[^()]*?)\)(?P.*?)\((?P[^()]*?)(\))+" ) @@ -62,7 +64,17 @@ class CypherQueryCorrector: Args: query: cypher query """ - return re.findall(self.path_pattern, query) + paths = [] + idx = 0 + while matched := self.path_pattern.findall(query[idx:]): + matched = matched[0] + matched = [ + m for i, m in enumerate(matched) if i not in [1, len(matched) - 1] + ] + path = "".join(matched) + idx = query.find(path) + len(path) - len(matched[-1]) + paths.append(path) + return paths def judge_direction(self, relation: str) -> str: """ diff --git a/libs/langchain/tests/unit_tests/data/cypher_corrector.csv b/libs/langchain/tests/unit_tests/data/cypher_corrector.csv index b9739c7ffd5..539e0e58b92 100644 --- a/libs/langchain/tests/unit_tests/data/cypher_corrector.csv +++ b/libs/langchain/tests/unit_tests/data/cypher_corrector.csv @@ -506,3 +506,7 @@ RETURN a.title AS title, c.text AS text, a.date AS date ORDER BY date DESC LIMIT 3" "MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m","(Some Label, SOME REL TYPE üäß, Sömé Øther Læbel)","MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m" "MATCH (n:`Some Label`)<-[:`SOME REL TYPE üäß`]-(m:`Sömé Øther Læbel`) RETURN n,m","(Some Label, SOME REL TYPE üäß, Sömé Øther Læbel)","MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m" +"MATCH (a:Actor {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)" +"MATCH (a:Actor)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name" +"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1" +"MATCH (g:Genre)<-[:IN_GENRE]-(m:Movie) RETURN g.name, COUNT(m) AS movieCount","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (g:Genre)<-[:IN_GENRE]-(m:Movie) RETURN g.name, COUNT(m) AS movieCount"