mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 04:07:54 +00:00
Fix invalid escape sequence
warnings by using raw strings for regexes. (#11943)
This code also generates warnings when our users' apps hit it, which is annoying and doesn't look great. Let's fix it.
This commit is contained in:
parent
8e1b1db90d
commit
debcf053eb
@ -23,7 +23,7 @@ from langchain.chains.query_constructor.ir import (
|
|||||||
Operator,
|
Operator,
|
||||||
)
|
)
|
||||||
|
|
||||||
GRAMMAR = """
|
GRAMMAR = r"""
|
||||||
?program: func_call
|
?program: func_call
|
||||||
?expr: func_call
|
?expr: func_call
|
||||||
| value
|
| value
|
||||||
|
@ -49,9 +49,9 @@ class AcreomLoader(BaseLoader):
|
|||||||
def _process_acreom_content(self, content: str) -> str:
|
def _process_acreom_content(self, content: str) -> str:
|
||||||
# remove acreom specific elements from content that
|
# remove acreom specific elements from content that
|
||||||
# do not contribute to the context of current document
|
# do not contribute to the context of current document
|
||||||
content = re.sub("\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*", "", content) # rm tasks
|
content = re.sub(r"\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*", "", content) # rm tasks
|
||||||
content = re.sub("#", "", content) # rm hashtags
|
content = re.sub(r"#", "", content) # rm hashtags
|
||||||
content = re.sub("\[\[.*?\]\]", "", content) # rm doclinks
|
content = re.sub(r"\[\[.*?\]\]", "", content) # rm doclinks
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
|
@ -40,7 +40,7 @@ class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel):
|
|||||||
tagging_chain = create_tagging_chain(schema, llm)
|
tagging_chain = create_tagging_chain(schema, llm)
|
||||||
document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain)
|
document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain)
|
||||||
original_documents = [
|
original_documents = [
|
||||||
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\This is the greatest movie ever made. 4 out of 5 stars."),
|
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
|
||||||
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ def create_metadata_tagger(
|
|||||||
|
|
||||||
document_transformer = create_metadata_tagger(schema, llm)
|
document_transformer = create_metadata_tagger(schema, llm)
|
||||||
original_documents = [
|
original_documents = [
|
||||||
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\This is the greatest movie ever made. 4 out of 5 stars."),
|
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
|
||||||
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ def clean_excerpt(excerpt: str) -> str:
|
|||||||
"""
|
"""
|
||||||
if not excerpt:
|
if not excerpt:
|
||||||
return excerpt
|
return excerpt
|
||||||
res = re.sub("\s+", " ", excerpt).replace("...", "")
|
res = re.sub(r"\s+", " ", excerpt).replace("...", "")
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ def combined_text(item: "ResultItem") -> str:
|
|||||||
|
|
||||||
|
|
||||||
DocumentAttributeValueType = Union[str, int, List[str], None]
|
DocumentAttributeValueType = Union[str, int, List[str], None]
|
||||||
"""Possible types of a DocumentAttributeValue.
|
"""Possible types of a DocumentAttributeValue.
|
||||||
|
|
||||||
Dates are also represented as str.
|
Dates are also represented as str.
|
||||||
"""
|
"""
|
||||||
|
@ -88,8 +88,8 @@ class MyScaleTranslator(Visitor):
|
|||||||
return self.map_dict[func](*args)
|
return self.map_dict[func](*args)
|
||||||
|
|
||||||
def visit_comparison(self, comparison: Comparison) -> Dict:
|
def visit_comparison(self, comparison: Comparison) -> Dict:
|
||||||
regex = "\((.*?)\)"
|
regex = r"\((.*?)\)"
|
||||||
matched = re.search("\(\w+\)", comparison.attribute)
|
matched = re.search(r"\(\w+\)", comparison.attribute)
|
||||||
|
|
||||||
# If arbitrary function is applied to an attribute
|
# If arbitrary function is applied to an attribute
|
||||||
if matched:
|
if matched:
|
||||||
|
@ -18,13 +18,13 @@ SUFFIXES_TO_IGNORE = (
|
|||||||
".epub",
|
".epub",
|
||||||
)
|
)
|
||||||
SUFFIXES_TO_IGNORE_REGEX = (
|
SUFFIXES_TO_IGNORE_REGEX = (
|
||||||
"(?!" + "|".join([re.escape(s) + "[\#'\"]" for s in SUFFIXES_TO_IGNORE]) + ")"
|
"(?!" + "|".join([re.escape(s) + r"[\#'\"]" for s in SUFFIXES_TO_IGNORE]) + ")"
|
||||||
)
|
)
|
||||||
PREFIXES_TO_IGNORE_REGEX = (
|
PREFIXES_TO_IGNORE_REGEX = (
|
||||||
"(?!" + "|".join([re.escape(s) for s in PREFIXES_TO_IGNORE]) + ")"
|
"(?!" + "|".join([re.escape(s) for s in PREFIXES_TO_IGNORE]) + ")"
|
||||||
)
|
)
|
||||||
DEFAULT_LINK_REGEX = (
|
DEFAULT_LINK_REGEX = (
|
||||||
f"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)[\#'\"]"
|
rf"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)[\#'\"]"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user