Files
hatchet/sdks/python/docs/generator/generate.py
Matt Kaye c8f56e0872 Feat: Python SDK Documentation, Part I (#1567)
* feat: initial mkdocs setup

* chore: lock

* fix: config + start getting docs working

* fix: remove lots more redundant :type docs, update config more

* feat: split up clients

* feat: add pydoclint

* fix: rm defaults from docstrings

* fix: pydoclint errors

* feat: run pydoclint in ci

* fix: lint on 3.13

* debug: try explicit config path

* fix: ignore venv

* feat: index, styling

* fix: rm footer

* fix: more style tweaks

* feat: generated docs

* fix: refactor a bit

* fix: regen

* Revert "fix: regen"

This reverts commit 7f66adc77840ad96d0eafe55c8dd467f71eb50fb.

* feat: improve prompting

* feat: add docs, modify theme config to enable toc for docs

* fix: lint

* fix: lint

* feat: regenerate

* feat: bs4 for html parsing

* feat: preview correctly

* fix: exclude site subdir from all the linters

* refactor: break up script into components

* feat: remove a bunch more stuff from the html

* feat: prettier, enable toc

* fix: enable tocs in more places + sort properly

* fix: code blocks, ordering

* fix: ordering

* feat: finish up feature clients

* fix: rm unused deps

* fix: routing + property tags + sidebar

* fix: hatchet client + formatting

* fix: allow selecting single set of files

* fix: lint

* rm: cruft

* fix: naming

* fix: runs client attrs

* fix: rm cruft page

* feat: internal linking + top level description

* [Python]: Fixing some more issues (#1573)

* fix: pass priority through from the task

* fix: improve eof handling slightly

* chore: version

* fix: improve eof handling

* fix: send prio from durable

* fix: naming

* cleanup: use a variable

* chore: version

* feat: comment explaining page depth thing

* chore: bump ver

* feat: standalone docs

* fix: prompting + heading levels
2025-04-18 15:34:07 -04:00

116 lines
3.0 KiB
Python

import argparse
import asyncio
import os
from typing import cast
from docs.generator.llm import parse_markdown
from docs.generator.paths import crawl_directory, find_child_paths
from docs.generator.shared import TMP_GEN_PATH
from docs.generator.types import Document
from docs.generator.utils import gather_max_concurrency, rm_rf
async def clean_markdown_with_openai(document: Document) -> None:
print("Generating mdx for", document.readable_source_path)
with open(document.source_path, "r", encoding="utf-8") as f:
original_md = f.read()
content = await parse_markdown(original_markdown=original_md)
if not content:
return None
with open(document.mdx_output_path, "w", encoding="utf-8") as f:
f.write(content)
def generate_sub_meta_entry(child: str) -> str:
child = child.replace("/", "")
return f"""
"{child}": {{
"title": "{child.replace("-", " ").title()}",
"theme": {{
"toc": true
}},
}},
"""
def generate_meta_js(docs: list[Document], children: set[str]) -> str:
prefix = docs[0].directory
subentries = [doc.meta_js_entry for doc in docs] + [
generate_sub_meta_entry(child.replace(prefix, "")) for child in children
]
sorted_subentries = sorted(
subentries,
key=lambda x: x.strip().split(":")[0].strip('"').lower(),
)
entries = "".join(sorted_subentries)
return f"export default {{{entries}}}"
def update_meta_js(documents: list[Document]) -> None:
meta_js_out_paths = {d.mdx_output_meta_js_path for d in documents}
for path in meta_js_out_paths:
relevant_documents = [d for d in documents if d.mdx_output_meta_js_path == path]
exemplar = relevant_documents[0]
directory = exemplar.directory
children = find_child_paths(directory, documents)
meta = generate_meta_js(relevant_documents, children)
out_path = exemplar.mdx_output_meta_js_path
with open(out_path, "w", encoding="utf-8") as f:
f.write(meta)
async def run(selections: list[str]) -> None:
rm_rf(TMP_GEN_PATH)
try:
os.system("poetry run mkdocs build")
documents = crawl_directory(TMP_GEN_PATH, selections)
await gather_max_concurrency(
*[clean_markdown_with_openai(d) for d in documents], max_concurrency=10
)
if not selections:
update_meta_js(documents)
os.chdir("../../frontend/docs")
os.system("pnpm lint:fix")
finally:
rm_rf("docs/site")
rm_rf("site")
rm_rf(TMP_GEN_PATH)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--select",
nargs="*",
type=str,
help="Select a subset of docs to generate. Note that this will prevent the `_meta.js` file from being generated.",
)
args = parser.parse_args()
selections = cast(list[str], args.select or [])
asyncio.run(run(selections))
if __name__ == "__main__":
main()