autopilot/models/SiaN/build.py

import json
import os
import re


def parse_schema(schema_path):
    with open(schema_path, 'r', encoding='utf-8') as f:
        content = f.read()

    imports = []
    items = []

    in_imports_section = False
    lines = content.split('\n')

    i = 0
    while i < len(lines):
        line = lines[i]
        stripped = line.strip()

        if stripped == '# === IMPORTS ===':
            in_imports_section = True
            i += 1
            continue
        elif stripped.startswith('# ==='):
            in_imports_section = False
            i += 1
            continue

        if in_imports_section and stripped and not stripped.startswith('#'):
            imports.append(stripped)
            i += 1
            continue

        if in_imports_section and stripped.startswith('#'):
            in_imports_section = False

        if stripped.startswith('# code:'):
            match = re.search(r'# code:\s*(.+)', stripped)
            if match:
                items.append(('code', match.group(1).strip()))
            i += 1
            continue

        if stripped.startswith('# inline:') or stripped.startswith('# # shell:'):
            directive = 'inline' if stripped.startswith('# inline:') else 'shell'
            i += 1
            code_lines = []
            while i < len(lines):
                stripped = lines[i].strip()
                if directive == 'shell':
                    if stripped.startswith('# ') or stripped.startswith('# # '):
                        line_content = lines[i].strip()
                        if line_content.startswith('# # '):
                            line_content = line_content[3:].lstrip()
                        else:
                            line_content = line_content[2:]
                        code_lines.append(line_content)
                        i += 1
                        continue
                    if stripped.startswith('#'):
                        if stripped.startswith('# code:') or stripped.startswith('# inline:') or stripped.startswith('# # shell:') or stripped == '# markdown' or stripped.startswith('# ==='):
                            break
                        i += 1
                        continue
                if stripped == '':
                    i += 1
                    continue
                code_lines.append(lines[i])
                i += 1
            if code_lines:
                items.append((directive, '\n'.join(code_lines).rstrip()))
            continue

        if stripped == '# markdown':
            i += 1
            if i < len(lines):
                next_line = lines[i].strip()
                if next_line.startswith('"""'):
                    if next_line.endswith('"""') and len(next_line) > 3:
                        md_content = next_line[3:-3].strip()
                        i += 1
                    else:
                        end_idx = None
                        for j in range(i + 1, len(lines)):
                            if '"""' in lines[j]:
                                end_idx = j
                                break
                        if end_idx:
                            md_content = '\n'.join(lines[i:end_idx])
                            md_content = md_content.strip('"""').strip()
                            i = end_idx + 1
                        else:
                            md_content = ""
                            i += 1
                    items.append(('markdown', md_content))
            continue

        i += 1

    return imports, items


def strip_all_imports(content):
    lines = content.split('\n')
    result_lines = []
    skip_block = False
    if_block_indent = 0

    for line in lines:
        stripped = line.strip()

        if stripped.startswith('if __name__') or stripped.startswith('if __name__ =='):
            skip_block = True
            if_block_indent = len(line) - len(line.lstrip())
            continue

        if skip_block:
            current_indent = len(line) - len(line.lstrip())
            if line.strip() == '':
                continue
            if current_indent < if_block_indent:
                skip_block = False
            elif current_indent == if_block_indent and stripped.startswith('if '):
                skip_block = True
                if_block_indent = current_indent
                continue
            else:
                continue

        if stripped.startswith('import ') or stripped.startswith('from '):
            continue

        result_lines.append(line)

    while result_lines and result_lines[-1].strip() == '':
        result_lines.pop()

    return '\n'.join(result_lines)


def read_src_file(ref, base_dir):
    ref_path = ref.replace('./src/', '').replace('src/', '')
    full_path = os.path.join(base_dir, 'src', ref_path.replace('./src/', '').lstrip('/'))

    if not full_path.endswith('.py'):
        full_path += '.py'

    with open(full_path, 'r', encoding='utf-8') as f:
        return f.read()


def build_notebook(schema_path, output_path=None):
    schema_dir = os.path.dirname(os.path.abspath(schema_path))
    if output_path is None:
        output_path = os.path.join(schema_dir, 'notebook.gen.ipynb')

    imports, items = parse_schema(schema_path)

    cells = []

    if imports:
        imports_cell = {
            "cell_type": "code",
            "execution_count": None,
            "metadata": {},
            "outputs": [],
            "source": [imp + "\n" for imp in imports]
        }
        cells.append(imports_cell)

    for item_type, item_content in items:
        if item_type == 'markdown':
            md_cell = {
                "cell_type": "markdown",
                "metadata": {},
                "source": item_content + "\n"
            }
            cells.append(md_cell)
        elif item_type == 'inline':
            lines = item_content.split('\n')
            while lines and lines[-1].strip() == '':
                lines.pop()
            if lines:
                lines.append('')
            cell = {
                "cell_type": "code",
                "execution_count": None,
                "metadata": {},
                "outputs": [],
                "source": [line + "\n" for line in lines]
            }
            cells.append(cell)
        elif item_type == 'shell':
            lines = item_content.split('\n')
            while lines and lines[-1].strip() == '':
                lines.pop()
            if lines:
                lines.append('')
            cell = {
                "cell_type": "code",
                "execution_count": None,
                "metadata": {},
                "outputs": [],
                "source": [line + "\n" for line in lines]
            }
            cells.append(cell)
        elif item_type == 'code':
            try:
                content = read_src_file(item_content, schema_dir)
                content = strip_all_imports(content)

                lines = content.split('\n')
                while lines and lines[-1].strip() == '':
                    lines.pop()
                if lines:
                    lines.append('')

                cell = {
                    "cell_type": "code",
                    "execution_count": None,
                    "metadata": {},
                    "outputs": [],
                    "source": [line + "\n" for line in lines]
                }
                cells.append(cell)
            except FileNotFoundError:
                print(f"Warning: Could not find: {item_content}")

    notebook = {
        "cells": cells,
        "metadata": {
            "kernelspec": {
                "display_name": ".venv",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "name": "python",
                "version": "3.11.0"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 5
    }

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(notebook, f, indent=1, ensure_ascii=False)

    print(f"Notebook generated: {output_path}")


if __name__ == "__main__":
    script_dir = os.path.dirname(os.path.abspath(__file__))
    schema_path = os.path.join(script_dir, '_schema.py')

    if os.path.exists(schema_path):
        build_notebook(schema_path)
    else:
        print(f"Error: _schema.py not found at {schema_path}")