import json import os import re def parse_schema(schema_path): with open(schema_path, 'r', encoding='utf-8') as f: content = f.read() imports = [] items = [] in_imports_section = False lines = content.split('\n') i = 0 while i < len(lines): line = lines[i] stripped = line.strip() if stripped == '# === IMPORTS ===': in_imports_section = True i += 1 continue elif stripped.startswith('# ==='): in_imports_section = False i += 1 continue if in_imports_section and stripped and not stripped.startswith('#'): imports.append(stripped) i += 1 continue if in_imports_section and stripped.startswith('#'): in_imports_section = False if stripped.startswith('# code:'): match = re.search(r'# code:\s*(.+)', stripped) if match: items.append(('code', match.group(1).strip())) i += 1 continue if stripped.startswith('# inline:') or stripped.startswith('# # shell:'): directive = 'inline' if stripped.startswith('# inline:') else 'shell' i += 1 code_lines = [] while i < len(lines): stripped = lines[i].strip() if directive == 'shell': if stripped.startswith('# ') or stripped.startswith('# # '): line_content = lines[i].strip() if line_content.startswith('# # '): line_content = line_content[3:].lstrip() else: line_content = line_content[2:] code_lines.append(line_content) i += 1 continue if stripped.startswith('#'): if stripped.startswith('# code:') or stripped.startswith('# inline:') or stripped.startswith('# # shell:') or stripped == '# markdown' or stripped.startswith('# ==='): break i += 1 continue if stripped == '': i += 1 continue code_lines.append(lines[i]) i += 1 if code_lines: items.append((directive, '\n'.join(code_lines).rstrip())) continue if stripped == '# markdown': i += 1 if i < len(lines): next_line = lines[i].strip() if next_line.startswith('"""'): if next_line.endswith('"""') and len(next_line) > 3: md_content = next_line[3:-3].strip() i += 1 else: end_idx = None for j in range(i + 1, len(lines)): if '"""' in lines[j]: end_idx = j break if end_idx: md_content = '\n'.join(lines[i:end_idx]) md_content = md_content.strip('"""').strip() i = end_idx + 1 else: md_content = "" i += 1 items.append(('markdown', md_content)) continue i += 1 return imports, items def strip_all_imports(content): lines = content.split('\n') result_lines = [] skip_block = False if_block_indent = 0 for line in lines: stripped = line.strip() if stripped.startswith('if __name__') or stripped.startswith('if __name__ =='): skip_block = True if_block_indent = len(line) - len(line.lstrip()) continue if skip_block: current_indent = len(line) - len(line.lstrip()) if line.strip() == '': continue if current_indent < if_block_indent: skip_block = False elif current_indent == if_block_indent and stripped.startswith('if '): skip_block = True if_block_indent = current_indent continue else: continue if stripped.startswith('import ') or stripped.startswith('from '): continue result_lines.append(line) while result_lines and result_lines[-1].strip() == '': result_lines.pop() return '\n'.join(result_lines) def read_src_file(ref, base_dir): ref_path = ref.replace('./src/', '').replace('src/', '') full_path = os.path.join(base_dir, 'src', ref_path.replace('./src/', '').lstrip('/')) if not full_path.endswith('.py'): full_path += '.py' with open(full_path, 'r', encoding='utf-8') as f: return f.read() def build_notebook(schema_path, output_path=None): schema_dir = os.path.dirname(os.path.abspath(schema_path)) if output_path is None: output_path = os.path.join(schema_dir, 'notebook.gen.ipynb') imports, items = parse_schema(schema_path) cells = [] if imports: imports_cell = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [imp + "\n" for imp in imports] } cells.append(imports_cell) for item_type, item_content in items: if item_type == 'markdown': md_cell = { "cell_type": "markdown", "metadata": {}, "source": item_content + "\n" } cells.append(md_cell) elif item_type == 'inline': lines = item_content.split('\n') while lines and lines[-1].strip() == '': lines.pop() if lines: lines.append('') cell = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [line + "\n" for line in lines] } cells.append(cell) elif item_type == 'shell': lines = item_content.split('\n') while lines and lines[-1].strip() == '': lines.pop() if lines: lines.append('') cell = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [line + "\n" for line in lines] } cells.append(cell) elif item_type == 'code': try: content = read_src_file(item_content, schema_dir) content = strip_all_imports(content) lines = content.split('\n') while lines and lines[-1].strip() == '': lines.pop() if lines: lines.append('') cell = { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [line + "\n" for line in lines] } cells.append(cell) except FileNotFoundError: print(f"Warning: Could not find: {item_content}") notebook = { "cells": cells, "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 } with open(output_path, 'w', encoding='utf-8') as f: json.dump(notebook, f, indent=1, ensure_ascii=False) print(f"Notebook generated: {output_path}") if __name__ == "__main__": script_dir = os.path.dirname(os.path.abspath(__file__)) schema_path = os.path.join(script_dir, '_schema.py') if os.path.exists(schema_path): build_notebook(schema_path) else: print(f"Error: _schema.py not found at {schema_path}")