Fix invalid escape sequence warnings by using raw strings for regexes. (#11943)

This code also generates warnings when our users' apps hit it, which is
annoying and doesn't look great. Let's fix it.
This commit is contained in:
Predrag Gruevski 2023-10-18 10:55:17 -04:00 committed by GitHub
parent 8e1b1db90d
commit debcf053eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 12 additions and 12 deletions

View File

@ -23,7 +23,7 @@ from langchain.chains.query_constructor.ir import (
Operator, Operator,
) )
GRAMMAR = """ GRAMMAR = r"""
?program: func_call ?program: func_call
?expr: func_call ?expr: func_call
| value | value

View File

@ -49,9 +49,9 @@ class AcreomLoader(BaseLoader):
def _process_acreom_content(self, content: str) -> str: def _process_acreom_content(self, content: str) -> str:
# remove acreom specific elements from content that # remove acreom specific elements from content that
# do not contribute to the context of current document # do not contribute to the context of current document
content = re.sub("\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*", "", content) # rm tasks content = re.sub(r"\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*", "", content) # rm tasks
content = re.sub("#", "", content) # rm hashtags content = re.sub(r"#", "", content) # rm hashtags
content = re.sub("\[\[.*?\]\]", "", content) # rm doclinks content = re.sub(r"\[\[.*?\]\]", "", content) # rm doclinks
return content return content
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:

View File

@ -40,7 +40,7 @@ class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel):
tagging_chain = create_tagging_chain(schema, llm) tagging_chain = create_tagging_chain(schema, llm)
document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain) document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain)
original_documents = [ original_documents = [
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\This is the greatest movie ever made. 4 out of 5 stars."), Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
] ]
@ -123,7 +123,7 @@ def create_metadata_tagger(
document_transformer = create_metadata_tagger(schema, llm) document_transformer = create_metadata_tagger(schema, llm)
original_documents = [ original_documents = [
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\This is the greatest movie ever made. 4 out of 5 stars."), Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}), Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
] ]

View File

@ -20,7 +20,7 @@ def clean_excerpt(excerpt: str) -> str:
""" """
if not excerpt: if not excerpt:
return excerpt return excerpt
res = re.sub("\s+", " ", excerpt).replace("...", "") res = re.sub(r"\s+", " ", excerpt).replace("...", "")
return res return res

View File

@ -88,8 +88,8 @@ class MyScaleTranslator(Visitor):
return self.map_dict[func](*args) return self.map_dict[func](*args)
def visit_comparison(self, comparison: Comparison) -> Dict: def visit_comparison(self, comparison: Comparison) -> Dict:
regex = "\((.*?)\)" regex = r"\((.*?)\)"
matched = re.search("\(\w+\)", comparison.attribute) matched = re.search(r"\(\w+\)", comparison.attribute)
# If arbitrary function is applied to an attribute # If arbitrary function is applied to an attribute
if matched: if matched:

View File

@ -18,13 +18,13 @@ SUFFIXES_TO_IGNORE = (
".epub", ".epub",
) )
SUFFIXES_TO_IGNORE_REGEX = ( SUFFIXES_TO_IGNORE_REGEX = (
"(?!" + "|".join([re.escape(s) + "[\#'\"]" for s in SUFFIXES_TO_IGNORE]) + ")" "(?!" + "|".join([re.escape(s) + r"[\#'\"]" for s in SUFFIXES_TO_IGNORE]) + ")"
) )
PREFIXES_TO_IGNORE_REGEX = ( PREFIXES_TO_IGNORE_REGEX = (
"(?!" + "|".join([re.escape(s) for s in PREFIXES_TO_IGNORE]) + ")" "(?!" + "|".join([re.escape(s) for s in PREFIXES_TO_IGNORE]) + ")"
) )
DEFAULT_LINK_REGEX = ( DEFAULT_LINK_REGEX = (
f"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)[\#'\"]" rf"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)[\#'\"]"
) )