Files
hatchet/sdks/python/docs/generator/markdown_export.py
Matt Kaye c8f56e0872 Feat: Python SDK Documentation, Part I (#1567)
* feat: initial mkdocs setup

* chore: lock

* fix: config + start getting docs working

* fix: remove lots more redundant :type docs, update config more

* feat: split up clients

* feat: add pydoclint

* fix: rm defaults from docstrings

* fix: pydoclint errors

* feat: run pydoclint in ci

* fix: lint on 3.13

* debug: try explicit config path

* fix: ignore venv

* feat: index, styling

* fix: rm footer

* fix: more style tweaks

* feat: generated docs

* fix: refactor a bit

* fix: regen

* Revert "fix: regen"

This reverts commit 7f66adc77840ad96d0eafe55c8dd467f71eb50fb.

* feat: improve prompting

* feat: add docs, modify theme config to enable toc for docs

* fix: lint

* fix: lint

* feat: regenerate

* feat: bs4 for html parsing

* feat: preview correctly

* fix: exclude site subdir from all the linters

* refactor: break up script into components

* feat: remove a bunch more stuff from the html

* feat: prettier, enable toc

* fix: enable tocs in more places + sort properly

* fix: code blocks, ordering

* fix: ordering

* feat: finish up feature clients

* fix: rm unused deps

* fix: routing + property tags + sidebar

* fix: hatchet client + formatting

* fix: allow selecting single set of files

* fix: lint

* rm: cruft

* fix: naming

* fix: runs client attrs

* fix: rm cruft page

* feat: internal linking + top level description

* [Python]: Fixing some more issues (#1573)

* fix: pass priority through from the task

* fix: improve eof handling slightly

* chore: version

* fix: improve eof handling

* fix: send prio from durable

* fix: naming

* cleanup: use a variable

* chore: version

* feat: comment explaining page depth thing

* chore: bump ver

* feat: standalone docs

* fix: prompting + heading levels
2025-04-18 15:34:07 -04:00

148 lines
4.4 KiB
Python

import os
from typing import cast
from bs4 import BeautifulSoup, Tag
from markdownify import markdownify # type: ignore[import-untyped]
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin
from mkdocs.structure.pages import Page
from docs.generator.shared import TMP_GEN_PATH
class MarkdownExportPlugin(BasePlugin): # type: ignore
def __init__(self) -> None:
super().__init__()
self.soup: BeautifulSoup
self.page_source_path: str
def _remove_async_tags(self) -> "MarkdownExportPlugin":
spans = self.soup.find_all("span", class_="doc doc-labels")
for span in spans:
if span.find(string="async") or (
span.text and "async" == span.get_text().strip()
):
span.decompose()
return self
def _remove_hash_links(self) -> "MarkdownExportPlugin":
links = self.soup.find_all("a", class_="headerlink")
for link in links:
href = cast(str, link["href"])
if href.startswith("#"):
link.decompose()
return self
def _remove_toc(self) -> "MarkdownExportPlugin":
tocs = self.soup.find_all("nav")
for toc in tocs:
toc.decompose()
return self
def _remove_footer(self) -> "MarkdownExportPlugin":
footer = self.soup.find("footer")
if footer and isinstance(footer, Tag):
footer.decompose()
return self
def _remove_navbar(self) -> "MarkdownExportPlugin":
navbar = self.soup.find("div", class_="navbar")
if navbar and isinstance(navbar, Tag):
navbar.decompose()
navbar_header = self.soup.find("div", class_="navbar-header")
if navbar_header and isinstance(navbar_header, Tag):
navbar_header.decompose()
navbar_collapse = self.soup.find("div", class_="navbar-collapse")
if navbar_collapse and isinstance(navbar_collapse, Tag):
navbar_collapse.decompose()
return self
def _remove_keyboard_shortcuts_modal(self) -> "MarkdownExportPlugin":
modal = self.soup.find("div", id="mkdocs_keyboard_modal")
if modal and isinstance(modal, Tag):
modal.decompose()
return self
def _remove_title(self) -> "MarkdownExportPlugin":
title = self.soup.find("h1", class_="title")
if title and isinstance(title, Tag):
title.decompose()
return self
def _remove_property_tags(self) -> "MarkdownExportPlugin":
property_tags = self.soup.find_all("code", string="property")
for tag in property_tags:
tag.decompose()
return self
def _interpolate_docs_links(self) -> "MarkdownExportPlugin":
links = self.soup.find_all("a")
page_depth = self.page_source_path.count("/")
## Using the depth + 2 here because the links are relative to the root of
## the SDK docs subdir, which sits at `/sdks/python` (two levels below the root)
dirs_up_prefix = "../" * (page_depth + 2)
for link in links:
href = link.get("href")
if not href:
continue
href = cast(str, link["href"])
if href.startswith("https://docs.hatchet.run/"):
link["href"] = href.replace("https://docs.hatchet.run/", dirs_up_prefix)
return self
def _preprocess_html(self, content: str) -> str:
self.soup = BeautifulSoup(content, "html.parser")
(
self._remove_async_tags()
._remove_hash_links()
._remove_toc()
._remove_footer()
._remove_keyboard_shortcuts_modal()
._remove_navbar()
._remove_title()
._remove_property_tags()
._interpolate_docs_links()
)
return str(self.soup)
def on_post_page(
self, output_content: str, page: Page, config: MkDocsConfig
) -> str:
self.page_source_path = page.file.src_uri
content = self._preprocess_html(output_content)
md_content = markdownify(content, heading_style="ATX", wrap=False)
if not md_content:
return content
dest = os.path.splitext(page.file.dest_path)[0] + ".md"
out_path = os.path.join(TMP_GEN_PATH, dest)
os.makedirs(os.path.dirname(out_path), exist_ok=True)
with open(out_path, "w", encoding="utf-8") as f:
f.write(md_content)
return content