mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-29 20:35:43 +00:00
Signed-off-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com> Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com> Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: ccurme <chester.curme@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com> Co-authored-by: ZhangShenao <15201440436@163.com> Co-authored-by: Friso H. Kingma <fhkingma@gmail.com> Co-authored-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Nuno Campos <nuno@langchain.dev> Co-authored-by: Morgante Pell <morgantep@google.com>
45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
import sys
|
|
from glob import glob
|
|
from pathlib import Path
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
CUR_DIR = Path(__file__).parents[1]
|
|
|
|
|
|
def process_toc_h3_elements(html_content: str) -> str:
|
|
"""Update Class.method() TOC headers to just method()."""
|
|
# Create a BeautifulSoup object
|
|
soup = BeautifulSoup(html_content, "html.parser")
|
|
|
|
# Find all <li> elements with class "toc-h3"
|
|
toc_h3_elements = soup.find_all("li", class_="toc-h3")
|
|
|
|
# Process each element
|
|
for element in toc_h3_elements:
|
|
try:
|
|
element = element.a.code.span
|
|
except Exception:
|
|
continue
|
|
# Get the text content of the element
|
|
content = element.get_text()
|
|
|
|
# Apply the regex substitution
|
|
modified_content = content.split(".")[-1]
|
|
|
|
# Update the element's content
|
|
element.string = modified_content
|
|
|
|
# Return the modified HTML
|
|
return str(soup)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
dir = sys.argv[1]
|
|
for fn in glob(str(f"{dir.rstrip('/')}/**/*.html"), recursive=True):
|
|
with open(fn, "r") as f:
|
|
html = f.read()
|
|
processed_html = process_toc_h3_elements(html)
|
|
with open(fn, "w") as f:
|
|
f.write(processed_html)
|