mirror of
				https://github.com/csunny/DB-GPT.git
				synced 2025-10-25 20:00:59 +00:00 
			
		
		
		
	Co-authored-by: Fangyin Cheng <staneyffer@gmail.com> Co-authored-by: lcx01800250 <lcx01800250@alibaba-inc.com> Co-authored-by: licunxing <864255598@qq.com> Co-authored-by: Aralhi <xiaoping0501@gmail.com> Co-authored-by: xuyuan23 <643854343@qq.com> Co-authored-by: aries_ckt <916701291@qq.com> Co-authored-by: hzh97 <2976151305@qq.com>
		
			
				
	
	
		
			95 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			95 lines
		
	
	
		
			2.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| 
 | |
| 
 | |
| def is_all_chinese(text):
 | |
|     ### Determine whether the string is pure Chinese
 | |
|     pattern = re.compile(r"^[一-龥]+$")
 | |
|     match = re.match(pattern, text)
 | |
|     return match is not None
 | |
| 
 | |
| 
 | |
| def is_number_chinese(text):
 | |
|     ### Determine whether the string is numbers and Chinese
 | |
|     pattern = re.compile(r"^[\d一-龥]+$")
 | |
|     match = re.match(pattern, text)
 | |
|     return match is not None
 | |
| 
 | |
| 
 | |
| def is_chinese_include_number(text):
 | |
|     ### Determine whether the string is pure Chinese or Chinese containing numbers
 | |
|     pattern = re.compile(r"^[一-龥]+[\d一-龥]*$")
 | |
|     match = re.match(pattern, text)
 | |
|     return match is not None
 | |
| 
 | |
| 
 | |
| def is_scientific_notation(string):
 | |
|     # 科学计数法的正则表达式
 | |
|     pattern = r"^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$"
 | |
|     # 使用正则表达式匹配字符串
 | |
|     match = re.match(pattern, str(string))
 | |
|     # 判断是否匹配成功
 | |
|     if match is not None:
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| 
 | |
| def extract_content(long_string, s1, s2, is_include: bool = False):
 | |
|     # extract text
 | |
|     match_map = {}
 | |
|     start_index = long_string.find(s1)
 | |
|     while start_index != -1:
 | |
|         if is_include:
 | |
|             end_index = long_string.find(s2, start_index + len(s1) + 1)
 | |
|             extracted_content = long_string[start_index : end_index + len(s2)]
 | |
|         else:
 | |
|             end_index = long_string.find(s2, start_index + len(s1))
 | |
|             extracted_content = long_string[start_index + len(s1) : end_index]
 | |
|         if extracted_content:
 | |
|             match_map[start_index] = extracted_content
 | |
|         start_index = long_string.find(s1, start_index + 1)
 | |
|     return match_map
 | |
| 
 | |
| 
 | |
| def extract_content_open_ending(long_string, s1, s2, is_include: bool = False):
 | |
|     # extract text  open ending
 | |
|     match_map = {}
 | |
|     start_index = long_string.find(s1)
 | |
|     while start_index != -1:
 | |
|         if long_string.find(s2, start_index) <= 0:
 | |
|             end_index = len(long_string)
 | |
|         else:
 | |
|             if is_include:
 | |
|                 end_index = long_string.find(s2, start_index + len(s1) + 1)
 | |
|             else:
 | |
|                 end_index = long_string.find(s2, start_index + len(s1))
 | |
|         if is_include:
 | |
|             extracted_content = long_string[start_index : end_index + len(s2)]
 | |
|         else:
 | |
|             extracted_content = long_string[start_index + len(s1) : end_index]
 | |
|         if extracted_content:
 | |
|             match_map[start_index] = extracted_content
 | |
|         start_index = long_string.find(s1, start_index + 1)
 | |
|     return match_map
 | |
| 
 | |
| 
 | |
| def str_to_bool(s):
 | |
|     if s.lower() in ("true", "t", "1", "yes", "y"):
 | |
|         return True
 | |
|     elif s.lower().startswith("true"):
 | |
|         return True
 | |
|     elif s.lower() in ("false", "f", "0", "no", "n"):
 | |
|         return False
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| 
 | |
| def _to_str(x, charset="utf8", errors="strict"):
 | |
|     if x is None or isinstance(x, str):
 | |
|         return x
 | |
| 
 | |
|     if isinstance(x, bytes):
 | |
|         return x.decode(charset, errors)
 | |
| 
 | |
|     return str(x)
 |