Files
doorman/scripts/cleanup_inline_comments.py
2025-09-29 22:20:23 -04:00

168 lines
5.1 KiB
Python

#!/usr/bin/env python3
import sys
import re
import tokenize
from io import BytesIO
from pathlib import Path
def strip_inline_comments_python(code: str) -> str:
out_tokens = []
try:
tok_iter = tokenize.tokenize(BytesIO(code.encode("utf-8")).readline)
for tok in tok_iter:
if tok.type == tokenize.COMMENT:
if tok.start[1] == 0:
out_tokens.append(tok)
else:
out_tokens.append(tok)
new_code = tokenize.untokenize(out_tokens).decode("utf-8")
except Exception:
new_lines = []
for line in code.splitlines(True):
if '#' in line:
idx = line.find('#')
if idx > 0 and line[:idx].strip():
line = line[:idx].rstrip() + ("\n" if line.endswith("\n") else "")
new_lines.append(line)
new_code = ''.join(new_lines)
lines = new_code.splitlines()
collapsed = []
blank_run = 0
for ln in lines:
if ln.strip() == "":
blank_run += 1
if blank_run <= 1:
collapsed.append("")
else:
blank_run = 0
collapsed.append(ln.rstrip())
return "\n".join(collapsed) + ("\n" if new_code.endswith("\n") else "")
def strip_inline_comments_ts(code: str) -> str:
def remove_trailing_line_comment(s: str) -> str:
i = 0
n = len(s)
in_single = False
in_double = False
in_backtick = False
escape = False
while i < n:
ch = s[i]
if escape:
escape = False
i += 1
continue
if ch == "\\":
escape = True
i += 1
continue
if not (in_single or in_double or in_backtick):
if ch == '"':
in_double = True
elif ch == "'":
in_single = True
elif ch == "`":
in_backtick = True
elif ch == "/" and i + 1 < n and s[i + 1] == "/":
prefix = s[:i]
if prefix.strip() == "":
return s
else:
return prefix.rstrip()
elif ch == "/" and i + 1 < n and s[i + 1] == "*":
end = s.find("*/", i + 2)
if end != -1:
prefix = s[:i]
suffix = s[end + 2:]
if prefix.strip():
s = (prefix.rstrip() + (" " if suffix and suffix.strip().startswith(('+','-','*','/')) else "") + suffix.lstrip())
n = len(s)
i = len(prefix)
continue
else:
return s
else:
if in_double and ch == '"':
in_double = False
elif in_single and ch == "'":
in_single = False
elif in_backtick and ch == "`":
in_backtick = False
i += 1
return s.rstrip()
processed = []
for line in code.splitlines(True):
newline = "\n" if line.endswith("\n") else ("\r\n" if line.endswith("\r\n") else "")
core = line[:-len(newline)] if newline else line
processed.append(remove_trailing_line_comment(core) + newline)
lines = ''.join(processed).splitlines()
collapsed = []
blank_run = 0
for ln in lines:
stripped = ln.strip()
# Remove stray JSX placeholders left by comment removal
if stripped == "{}":
continue
if stripped == "":
blank_run += 1
if blank_run <= 1:
collapsed.append("")
else:
blank_run = 0
collapsed.append(ln.rstrip())
return "\n".join(collapsed) + ("\n" if processed and processed[-1].endswith("\n") else "")
def main(paths):
exts_py = {".py"}
exts_ts = {".ts", ".tsx"}
touched = []
skip_dirs = {"node_modules", "venv", ".venv", ".git", "dist", "build"}
for p in paths:
path = Path(p)
if path.is_dir():
for f in path.rglob("*"):
if not f.is_file():
continue
# Skip files inside ignored directories
parts = set(part for part in f.parts)
if parts & skip_dirs:
continue
if f.suffix in exts_py | exts_ts:
touched.append(f)
else:
if path.suffix in exts_py | exts_ts:
touched.append(path)
for f in sorted(set(touched)):
try:
original = f.read_text(encoding="utf-8")
except Exception:
continue
if f.suffix in exts_py:
cleaned = strip_inline_comments_python(original)
else:
cleaned = strip_inline_comments_ts(original)
if cleaned != original:
f.write_text(cleaned, encoding="utf-8")
print(f"cleaned: {f}")
if __name__ == "__main__":
args = sys.argv[1:] or ["."]
main(args)