infra: use nbconvert for docs build (#21135)

todo

- [x] remove quarto build semantics
- [x] remove quarto download/install
- [x] make `uv` not verbose
This commit is contained in:
Erick Friis
2024-05-07 12:30:17 -07:00
committed by GitHub
parent ad0f3c14c2
commit d5bde4fa91
14 changed files with 262 additions and 216 deletions

View File

@@ -185,8 +185,8 @@ def replace_imports(file):
# Use re.sub to replace each Python code block
data = code_block_re.sub(replacer, data)
if all_imports:
print(f"Adding {len(all_imports)} links for imports in {file}") # noqa: T201
# if all_imports:
# print(f"Adding {len(all_imports)} links for imports in {file}") # noqa: T201
with open(file, "w") as f:
f.write(data)
return all_imports

View File

@@ -0,0 +1,130 @@
import multiprocessing
import os
import re
import sys
from pathlib import Path
from typing import Iterable, Tuple
import nbformat
from nbconvert.exporters import MarkdownExporter
from nbconvert.preprocessors import Preprocessor, RegexRemovePreprocessor
class EscapePreprocessor(Preprocessor):
def preprocess_cell(self, cell, resources, cell_index):
if cell.cell_type == "markdown":
# find all occurrences of ```{=mdx} blocks and remove wrapper
if "```{=mdx}\n" in cell.source:
cell.source = re.sub(
r"```{=mdx}\n(.*?)\n```", r"\1", cell.source, flags=re.DOTALL
)
if ":::{.callout" in cell.source:
cell.source = re.sub(
r":::{.callout-([^}]*)}(.*?):::",
r":::\1\2:::",
cell.source,
flags=re.DOTALL,
)
return cell, resources
class ExtractAttachmentsPreprocessor(Preprocessor):
"""
Extracts all of the outputs from the notebook file. The extracted
outputs are returned in the 'resources' dictionary.
"""
def preprocess_cell(self, cell, resources, cell_index):
"""
Apply a transformation on each cell,
Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
cell_index : int
Index of the cell being processed (see base.py)
"""
# Get files directory if it has been specified
# Make sure outputs key exists
if not isinstance(resources["outputs"], dict):
resources["outputs"] = {}
# Loop through all of the attachments in the cell
for name, attach in cell.get("attachments", {}).items():
for mime, data in attach.items():
if mime not in {
"image/png",
"image/jpeg",
"image/svg+xml",
"application/pdf",
}:
continue
# attachments are pre-rendered. Only replace markdown-formatted
# images with the following logic
attach_str = f"({name})"
if attach_str in cell.source:
data = f"(data:{mime};base64,{data})"
cell.source = cell.source.replace(attach_str, data)
return cell, resources
exporter = MarkdownExporter(
preprocessors=[
EscapePreprocessor,
ExtractAttachmentsPreprocessor,
RegexRemovePreprocessor(patterns=[r"^\s*$"]),
],
template_name="mdoutput",
extra_template_basedirs=["./scripts/notebook_convert_templates"],
)
def _process_path(tup: Tuple[Path, Path, Path]):
notebook_path, intermediate_docs_dir, output_docs_dir = tup
relative = notebook_path.relative_to(intermediate_docs_dir)
output_path = output_docs_dir / relative.parent / (relative.stem + ".md")
_convert_notebook(notebook_path, output_path)
def _convert_notebook(notebook_path: Path, output_path: Path):
with open(notebook_path) as f:
nb = nbformat.read(f, as_version=4)
body, resources = exporter.from_notebook_node(nb)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
f.write(body)
return output_path
if __name__ == "__main__":
intermediate_docs_dir = Path(sys.argv[1])
output_docs_dir = Path(sys.argv[2])
source_paths_arg = os.environ.get("SOURCE_PATHS")
source_paths: Iterable[Path]
if source_paths_arg:
source_path_strs = re.split(r"\s+", source_paths_arg)
source_paths_stripped = [p.strip() for p in source_path_strs]
source_paths = [intermediate_docs_dir / p for p in source_paths_stripped if p]
else:
source_paths = intermediate_docs_dir.glob("**/*.ipynb")
with multiprocessing.Pool() as pool:
pool.map(
_process_path,
(
(notebook_path, intermediate_docs_dir, output_docs_dir)
for notebook_path in source_paths
),
)

View File

@@ -0,0 +1,5 @@
{
"mimetypes": {
"text/markdown": true
}
}

View File

@@ -0,0 +1,33 @@
{% extends 'markdown/index.md.j2' %}
{%- block traceback_line -%}
```output
{{ line.rstrip() | strip_ansi }}
```
{%- endblock traceback_line -%}
{%- block stream -%}
```output
{{ output.text.rstrip() }}
```
{%- endblock stream -%}
{%- block data_text scoped -%}
```output
{{ output.data['text/plain'].rstrip() }}
```
{%- endblock data_text -%}
{%- block data_html scoped -%}
```html
{{ output.data['text/html'] | safe }}
```
{%- endblock data_html -%}
{%- block data_jpg scoped -%}
![](data:image/jpg;base64,{{ output.data['image/jpeg'] }})
{%- endblock data_jpg -%}
{%- block data_png scoped -%}
![](data:image/png;base64,{{ output.data['image/png'] }})
{%- endblock data_png -%}