mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-10-23 18:31:22 +00:00
refactor: The first refactored version for sdk release (#907)
Co-authored-by: chengfangyin2 <chengfangyin3@jd.com>
This commit is contained in:
81
dbgpt/util/string_utils.py
Normal file
81
dbgpt/util/string_utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import re
|
||||
|
||||
|
||||
def is_all_chinese(text):
|
||||
### Determine whether the string is pure Chinese
|
||||
pattern = re.compile(r"^[一-龥]+$")
|
||||
match = re.match(pattern, text)
|
||||
return match is not None
|
||||
|
||||
|
||||
def is_number_chinese(text):
|
||||
### Determine whether the string is numbers and Chinese
|
||||
pattern = re.compile(r"^[\d一-龥]+$")
|
||||
match = re.match(pattern, text)
|
||||
return match is not None
|
||||
|
||||
|
||||
def is_chinese_include_number(text):
|
||||
### Determine whether the string is pure Chinese or Chinese containing numbers
|
||||
pattern = re.compile(r"^[一-龥]+[\d一-龥]*$")
|
||||
match = re.match(pattern, text)
|
||||
return match is not None
|
||||
|
||||
|
||||
def is_scientific_notation(string):
|
||||
# 科学计数法的正则表达式
|
||||
pattern = r"^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$"
|
||||
# 使用正则表达式匹配字符串
|
||||
match = re.match(pattern, str(string))
|
||||
# 判断是否匹配成功
|
||||
if match is not None:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def extract_content(long_string, s1, s2, is_include: bool = False):
|
||||
# extract text
|
||||
match_map = {}
|
||||
start_index = long_string.find(s1)
|
||||
while start_index != -1:
|
||||
if is_include:
|
||||
end_index = long_string.find(s2, start_index + len(s1) + 1)
|
||||
extracted_content = long_string[start_index : end_index + len(s2)]
|
||||
else:
|
||||
end_index = long_string.find(s2, start_index + len(s1))
|
||||
extracted_content = long_string[start_index + len(s1) : end_index]
|
||||
if extracted_content:
|
||||
match_map[start_index] = extracted_content
|
||||
start_index = long_string.find(s1, start_index + 1)
|
||||
return match_map
|
||||
|
||||
|
||||
def extract_content_open_ending(long_string, s1, s2, is_include: bool = False):
|
||||
# extract text open ending
|
||||
match_map = {}
|
||||
start_index = long_string.find(s1)
|
||||
while start_index != -1:
|
||||
if long_string.find(s2, start_index) <= 0:
|
||||
end_index = len(long_string)
|
||||
else:
|
||||
if is_include:
|
||||
end_index = long_string.find(s2, start_index + len(s1) + 1)
|
||||
else:
|
||||
end_index = long_string.find(s2, start_index + len(s1))
|
||||
if is_include:
|
||||
extracted_content = long_string[start_index : end_index + len(s2)]
|
||||
else:
|
||||
extracted_content = long_string[start_index + len(s1) : end_index]
|
||||
if extracted_content:
|
||||
match_map[start_index] = extracted_content
|
||||
start_index = long_string.find(s1, start_index + 1)
|
||||
return match_map
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
s = "abcd123efghijkjhhh456xxx123aa456yyy123bb456xx123"
|
||||
s1 = "123"
|
||||
s2 = "456"
|
||||
|
||||
print(extract_content_open_ending(s, s1, s2, True))
|
||||
Reference in New Issue
Block a user