Skip to content

Analyzers API Reference

Auto-generated from source code docstrings.

Context Compiler

analyzers.context_compiler

Context Compiler - One-shot project understanding for AI agents.

Runs all analyzers and compresses their output into a single structured document optimized for AI consumption. This is the "drop an AI into a project and it immediately understands everything" tool.

The output is designed to fit within a reasonable context window while giving an AI agent enough information to: - Understand what the project does - Know where everything is - Understand the dependency structure - Know which components are extractable - Start working immediately

Usage

python context_compiler.py python context_compiler.py --output context.json python context_compiler.py --output context.md --markdown python context_compiler.py --budget 5000 # max ~5000 lines

Functions

compile_context

compile_context(project_path: str, budget: int = 8000, skip_graph: bool = False) -> dict

Compile full project context.

Source code in Tools/analyzers/context_compiler.py
def compile_context(
    project_path: str,
    budget: int = 8000,
    skip_graph: bool = False,
) -> dict:
    """Compile full project context."""

    project = Path(project_path).resolve()
    print(f"\nCompiling context for: {project.name}", flush=True)
    print(f"{'='*50}", flush=True)

    context = {
        "meta": {
            "project": project.name,
            "root": str(project),
            "compiled_at": datetime.now().isoformat(),
            "compiler_version": "1.0.0",
        },
    }

    # 1. Structure overview
    print("  [1/6] Analyzing structure...", flush=True)
    try:
        structure = analyze_structure(str(project))
        context["structure"] = {
            "total_files": structure["total_files"],
            "total_dirs": structure["total_dirs"],
            "total_size": structure["total_size_bytes"],
            "top_level_dirs": structure["top_level_dirs"],
            "file_types": {
                ext: data["count"]
                for ext, data in sorted(
                    structure["file_types"].items(),
                    key=lambda x: x[1]["count"],
                    reverse=True
                )[:15]
            },
            "largest_files": [
                {"path": f["path"], "size": f["size"]}
                for f in structure["largest_files"][:10]
            ],
        }
    except Exception as e:
        context["structure"] = {"error": str(e)}

    # 2. Platform detection
    print("  [2/6] Detecting platforms...", flush=True)
    try:
        platforms = detect_all_platforms(project)
        context["platforms"] = {
            "detected": [
                {
                    "name": p.platform,
                    "version": p.version,
                    "confidence": p.confidence,
                }
                for p in platforms.platforms
            ],
            "is_multi_platform": platforms.is_multi_platform,
        }
    except Exception as e:
        context["platforms"] = {"error": str(e)}

    # 3. Frameworks
    print("  [3/6] Detecting frameworks...", flush=True)
    context["frameworks"] = detect_framework(project)

    # 4. Code map (condensed)
    print("  [4/6] Building code map...", flush=True)
    try:
        codemap = generate_codemap(str(project), max_files=1500)
        context["codemap"] = {
            "languages": codemap["summary"]["languages"],
            "entry_points": codemap["entry_points"],
            # Only include files that have structure (classes/functions)
            "key_files": [],
        }

        for f in codemap["files"]:
            has_structure = any(
                f.get(k) for k in
                ["classes", "functions", "structs", "traits",
                 "interfaces", "enums", "impls", "exports"]
            )
            if has_structure and f.get("role") == "source":
                # Condense: just names, not full details
                entry = {"path": f["path"], "lines": f.get("line_count", 0)}

                for k in ["classes", "structs"]:
                    items = f.get(k, [])
                    if items:
                        entry[k] = [
                            {
                                "name": c["name"],
                                "methods": len(c.get("methods", [])),
                                "bases": c.get("bases", []),
                            }
                            for c in items
                        ]

                funcs = f.get("functions", [])
                if funcs:
                    entry["functions"] = [fn["name"] for fn in funcs]

                for k in ["traits", "interfaces", "enums"]:
                    items = f.get(k, [])
                    if items:
                        entry[k] = [x["name"] for x in items]

                exports = f.get("exports", [])
                if exports:
                    entry["exports"] = [e["name"] for e in exports]

                context["codemap"]["key_files"].append(entry)

    except Exception as e:
        context["codemap"] = {"error": str(e)}

    # 5. Interfaces (condensed)
    print("  [5/6] Extracting interfaces...", flush=True)
    try:
        interfaces = extract_interfaces(str(project))
        # Only include modules with meaningful public APIs
        context["interfaces"] = {
            "module_count": interfaces["summary"]["modules_with_interfaces"],
            "modules": {},
        }

        for mod_path, iface in interfaces["modules"].items():
            condensed = {}

            for cls in iface.get("classes", []):
                public_methods = [
                    m["name"] for m in cls.get("methods", [])
                    if not m["name"].startswith('_') or m["name"] == '__init__'
                ]
                if public_methods:
                    condensed.setdefault("classes", []).append({
                        "name": cls["name"],
                        "bases": cls.get("bases", []),
                        "methods": public_methods,
                    })

            func_names = [f["name"] for f in iface.get("functions", [])]
            if func_names:
                condensed["functions"] = func_names

            type_names = [t["name"] for t in iface.get("types", [])]
            if type_names:
                condensed["types"] = type_names

            export_names = [
                e["name"] if isinstance(e, dict) else e
                for e in iface.get("exports", [])
            ]
            if export_names:
                condensed["exports"] = export_names

            if condensed:
                context["interfaces"]["modules"][mod_path] = condensed

    except Exception as e:
        context["interfaces"] = {"error": str(e)}

    # 6. Boundaries (if not skipped)
    if not skip_graph:
        print("  [6/6] Detecting boundaries...", flush=True)
        try:
            boundaries = detect_boundaries(str(project), min_cluster=2)
            context["boundaries"] = {
                "clusters": [
                    {
                        "name": c.get("common_prefix") or f"cluster-{c['id']}",
                        "files": c["file_count"],
                        "cohesion": c["cohesion"],
                        "external_deps": c["external_dep_count"],
                        "extraction_risk": (
                            "low" if c["external_dep_count"] == 0 else
                            "medium" if c["external_dep_count"] <= 3 else "high"
                        ),
                    }
                    for c in boundaries["clusters"]
                ],
                "bridge_files": [
                    {"file": b["file"], "connects": b["connects_dirs"]}
                    for b in boundaries["bridges"][:10]
                ],
                "orphan_count": len(boundaries["orphans"]),
                "orphans": boundaries["orphans"][:20],
            }
        except Exception as e:
            context["boundaries"] = {"error": str(e)}
    else:
        print("  [6/6] Skipping boundary detection", flush=True)

    # 7. Project docs (bonus)
    docs = read_project_docs(project)
    if docs:
        context["docs"] = docs

    return context

format_markdown

format_markdown(context: dict) -> str

Format compiled context as markdown.

Source code in Tools/analyzers/context_compiler.py
def format_markdown(context: dict) -> str:
    """Format compiled context as markdown."""
    lines = [
        f"# Project Context: {context['meta']['project']}",
        "",
        f"*Compiled: {context['meta']['compiled_at']}*",
        "",
    ]

    # Structure
    s = context.get("structure", {})
    if s and "error" not in s:
        lines.extend([
            "## Structure",
            "",
            f"**{s.get('total_files', '?')} files** | "
            f"**{s.get('total_dirs', '?')} dirs** | "
            f"Top-level: {', '.join(f'`{d}`' for d in s.get('top_level_dirs', [])[:10])}",
            "",
        ])

    # Platforms & Frameworks
    platforms = context.get("platforms", {})
    frameworks = context.get("frameworks", [])
    if platforms.get("detected") or frameworks:
        lines.append("## Tech Stack")
        lines.append("")
        for p in platforms.get("detected", []):
            ver = f" {p['version']}" if p.get("version") else ""
            lines.append(f"- **{p['name']}**{ver} ({p['confidence']})")
        for f in frameworks:
            lines.append(f"- {f}")
        lines.append("")

    # Entry Points
    eps = context.get("codemap", {}).get("entry_points", [])
    if eps:
        lines.extend(["## Entry Points", ""])
        for ep in eps:
            lines.append(f"- `{ep['path']}` ({ep['type']})")
        lines.append("")

    # Key Files
    key_files = context.get("codemap", {}).get("key_files", [])
    if key_files:
        lines.extend(["## Key Source Files", ""])
        for f in key_files:
            parts = []
            for cls in f.get("classes", []) + f.get("structs", []):
                parts.append(f"class {cls['name']}({cls.get('methods', 0)} methods)")
            for fn_name in f.get("functions", [])[:5]:
                parts.append(f"fn {fn_name}")
            for t in f.get("traits", []) + f.get("interfaces", []):
                parts.append(f"interface {t}")

            detail = f" - {', '.join(parts)}" if parts else ""
            lines.append(f"- `{f['path']}` ({f.get('lines', '?')}L){detail}")
        lines.append("")

    # Boundaries
    bounds = context.get("boundaries", {})
    if bounds and "error" not in bounds:
        clusters = bounds.get("clusters", [])
        if clusters:
            lines.extend(["## Component Boundaries", "",
                          "| Component | Files | Cohesion | External Deps | Risk |",
                          "|-----------|-------|----------|---------------|------|"])
            for c in clusters:
                lines.append(
                    f"| `{c['name']}` | {c['files']} | "
                    f"{c['cohesion']} | {c['external_deps']} | "
                    f"{c['extraction_risk']} |"
                )
            lines.append("")

        bridges = bounds.get("bridge_files", [])
        if bridges:
            lines.extend(["### Bridge Files", ""])
            for b in bridges:
                lines.append(f"- `{b['file']}` connects {', '.join(b['connects'][:4])}")
            lines.append("")

    # Docs
    docs = context.get("docs", {})
    if docs:
        lines.extend(["## Project Documentation", ""])
        for filename, content in docs.items():
            lines.append(f"### {filename}")
            lines.append("")
            # Only first ~50 lines
            doc_lines = content.split('\n')[:50]
            lines.extend(doc_lines)
            if len(content.split('\n')) > 50:
                lines.append("*(truncated)*")
            lines.append("")

    return "\n".join(lines)

Code Map

analyzers.codemap

Code Map Generator - The most important AI agent accelerator.

Scans a codebase and produces a structured index of everything an AI agent needs to understand the project without reading every file: - File roles (config, source, test, docs, build) - Classes with methods and inheritance - Functions with signatures - Exports and entry points - Key constants and type definitions

Supports: Python, JavaScript/TypeScript, Rust, Go, C#, Java

Output: JSON (default) or markdown (--markdown)

Usage

python codemap.py python codemap.py --output codemap.json python codemap.py --markdown --output codemap.md python codemap.py --depth 3 --max-files 500

Functions

generate_codemap

generate_codemap(project_path: str, max_files: int = 2000) -> dict

Generate a complete code map for a project.

Source code in Tools/analyzers/codemap.py
def generate_codemap(project_path: str, max_files: int = 2000) -> dict:
    """Generate a complete code map for a project."""

    project = Path(project_path).resolve()
    if not project.exists():
        raise FileNotFoundError(f"Project path not found: {project_path}")

    file_entries = []
    lang_stats = defaultdict(lambda: {"files": 0, "lines": 0})
    role_stats = defaultdict(int)
    file_count = 0

    for root, dirs, files in os.walk(project):
        # Skip ignored directories
        dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')]

        for filename in sorted(files):
            if filename in SKIP_FILES or filename.startswith('.'):
                continue

            file_count += 1
            if file_count > max_files:
                break

            filepath = Path(root) / filename
            entry = scan_file(filepath, project)

            if entry:
                file_entries.append(entry)
                role_stats[entry.get("role", "unknown")] += 1
                if entry.get("lang"):
                    lang_stats[entry["lang"]]["files"] += 1
                    lang_stats[entry["lang"]]["lines"] += entry.get("line_count", 0)

        if file_count > max_files:
            break

    # Detect entry points
    entry_points = find_entry_points(project, file_entries)

    # Build summary
    codemap = {
        "project": project.name,
        "root": str(project),
        "summary": {
            "total_files": len(file_entries),
            "languages": dict(lang_stats),
            "roles": dict(role_stats),
            "truncated": file_count > max_files,
        },
        "entry_points": entry_points,
        "files": file_entries,
    }

    return codemap

scan_file

scan_file(filepath: Path, project_root: Path) -> Optional[dict]

Scan a single file and return its map entry.

Source code in Tools/analyzers/codemap.py
def scan_file(filepath: Path, project_root: Path) -> Optional[dict]:
    """Scan a single file and return its map entry."""
    rel_path = str(filepath.relative_to(project_root))
    ext = filepath.suffix.lower()
    lang = LANG_MAP.get(ext)
    role = classify_role(rel_path)

    try:
        stat = filepath.stat()
        size = stat.st_size
    except OSError:
        return None

    # Skip very large files (likely generated)
    if size > 500_000:
        return {
            "path": rel_path,
            "lang": lang,
            "role": role,
            "size": size,
            "skipped": "too_large",
        }

    entry = {
        "path": rel_path,
        "lang": lang,
        "role": role,
        "size": size,
        "line_count": 0,
    }

    # Only parse source files with known languages
    if lang and role == "source" and lang in PARSERS:
        try:
            content = filepath.read_text(encoding='utf-8', errors='ignore')
            entry["line_count"] = content.count('\n') + 1
            parsed = PARSERS[lang](filepath, content)
            # Only include non-empty sections
            for key, value in parsed.items():
                if value:
                    entry[key] = value
        except Exception:
            pass
    elif lang:
        try:
            content = filepath.read_text(encoding='utf-8', errors='ignore')
            entry["line_count"] = content.count('\n') + 1
        except Exception:
            pass

    return entry

find_entry_points

find_entry_points(project_root: Path, file_entries: list[dict]) -> list[dict]

Detect likely entry points for the project.

Source code in Tools/analyzers/codemap.py
def find_entry_points(project_root: Path, file_entries: list[dict]) -> list[dict]:
    """Detect likely entry points for the project."""
    entry_points = []

    indicators = [
        ("main.py", "python"),
        ("app.py", "python"),
        ("server.py", "python"),
        ("__main__.py", "python"),
        ("manage.py", "python (django)"),
        ("wsgi.py", "python (wsgi)"),
        ("asgi.py", "python (asgi)"),
        ("index.js", "javascript"),
        ("index.ts", "typescript"),
        ("server.js", "javascript"),
        ("server.ts", "typescript"),
        ("app.js", "javascript"),
        ("app.ts", "typescript"),
        ("main.rs", "rust"),
        ("lib.rs", "rust (library)"),
        ("main.go", "go"),
        ("Program.cs", "csharp"),
        ("Main.java", "java"),
    ]

    paths_set = {e["path"] for e in file_entries}

    for filename, lang in indicators:
        for path in paths_set:
            if path.endswith(filename):
                entry_points.append({"path": path, "type": lang})

    # Check package.json for scripts
    pkg_json = project_root / "package.json"
    if pkg_json.exists():
        try:
            data = json.loads(pkg_json.read_text())
            if "main" in data:
                entry_points.append({"path": data["main"], "type": "package.json main"})
            if "scripts" in data:
                for key in ("start", "dev", "serve"):
                    if key in data["scripts"]:
                        entry_points.append({
                            "path": f"package.json scripts.{key}",
                            "type": data["scripts"][key][:60],
                        })
        except Exception:
            pass

    # Check pyproject.toml for entry points
    pyproject = project_root / "pyproject.toml"
    if pyproject.exists():
        try:
            content = pyproject.read_text()
            # Simple regex for [project.scripts]
            m = re.search(r'\[project\.scripts\]\s*\n((?:\w+\s*=.*\n?)+)', content)
            if m:
                for line in m.group(1).strip().split('\n'):
                    parts = line.split('=', 1)
                    if len(parts) == 2:
                        entry_points.append({
                            "path": f"pyproject.toml scripts.{parts[0].strip()}",
                            "type": parts[1].strip().strip('"\''),
                        })
        except Exception:
            pass

    return entry_points

classify_role

classify_role(filepath: str) -> str

Classify a file's role based on its path and name.

Source code in Tools/analyzers/codemap.py
def classify_role(filepath: str) -> str:
    """Classify a file's role based on its path and name."""
    for role, patterns in ROLE_PATTERNS.items():
        for pattern in patterns:
            if re.search(pattern, filepath, re.IGNORECASE):
                return role
    return "source"

Boundary Detector

analyzers.boundary_detector

Boundary Detector - Find natural component boundaries in codebases.

Analyzes the import/dependency graph to detect clusters of files that naturally belong together. Helps AI agents decide WHERE to cut when extracting components.

Outputs
  • Detected component clusters (files that import each other heavily)
  • Bridge files (connect multiple clusters - extract carefully)
  • Orphan files (no imports to/from - easy to extract or remove)
  • Suggested extraction units with dependency counts
  • Cross-cluster dependencies (what breaks if you extract)

Supports: Python, JavaScript/TypeScript, Go, Rust

Usage

python boundary_detector.py python boundary_detector.py --output boundaries.json python boundary_detector.py --markdown python boundary_detector.py --min-cluster 3

Functions

detect_boundaries

detect_boundaries(project_path: str, min_cluster: int = 2) -> dict

Run full boundary detection.

Source code in Tools/analyzers/boundary_detector.py
def detect_boundaries(project_path: str, min_cluster: int = 2) -> dict:
    """Run full boundary detection."""
    project = Path(project_path).resolve()

    print("  Building import graph...", flush=True)
    graph = build_import_graph(str(project))

    print("  Detecting clusters...", flush=True)
    clusters = detect_clusters(graph, min_cluster)

    print("  Finding bridge files...", flush=True)
    bridges = find_bridge_files(graph)

    print("  Finding orphans...", flush=True)
    orphans = find_orphans(graph, set(graph["files"].values()))

    # Build extraction suggestions
    suggestions = []
    for cluster in clusters:
        extractable = cluster["external_dep_count"] == 0
        risk = "low" if extractable else (
            "medium" if cluster["external_dep_count"] <= 3 else "high"
        )

        suggestions.append({
            "cluster_id": cluster["id"],
            "name": cluster["common_prefix"] or f"cluster-{cluster['id']}",
            "files": cluster["file_count"],
            "extraction_risk": risk,
            "reason": (
                "No external dependencies - clean extraction"
                if extractable else
                f"{cluster['external_dep_count']} external deps, "
                f"{cluster['external_dependent_count']} external dependents"
            ),
        })

    return {
        "project": project.name,
        "root": str(project),
        "summary": {
            "total_source_files": len(graph["files"]),
            "total_import_edges": sum(len(v) for v in graph["imports"].values()),
            "clusters_found": len(clusters),
            "bridge_files": len(bridges),
            "orphan_files": len(orphans),
        },
        "clusters": clusters,
        "bridges": bridges,
        "orphans": orphans,
        "extraction_suggestions": suggestions,
        "import_graph": graph["imports"],
    }

build_import_graph

build_import_graph(project_path: str) -> dict

Build a full import graph for the project.

Source code in Tools/analyzers/boundary_detector.py
def build_import_graph(project_path: str) -> dict:
    """Build a full import graph for the project."""
    project = Path(project_path).resolve()

    # Map all source files
    all_files = {}  # normalized_path -> real relative path
    for root, dirs, files in os.walk(project):
        dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')]

        for filename in files:
            filepath = Path(root) / filename
            ext = filepath.suffix.lower()
            if ext in EXTRACTORS:
                rel = str(filepath.relative_to(project))
                # Create normalized keys (without extension, for matching)
                norm = rel.rsplit('.', 1)[0]
                all_files[norm] = rel
                # Also index without /index suffix
                if norm.endswith('/index'):
                    all_files[norm[:-6]] = rel

    # Build graph
    graph = defaultdict(set)  # file -> set of files it imports
    reverse_graph = defaultdict(set)  # file -> set of files that import it

    for norm_path, rel_path in all_files.items():
        filepath = project / rel_path
        ext = filepath.suffix.lower()
        extractor = EXTRACTORS.get(ext)
        if not extractor:
            continue

        raw_imports = extractor(filepath, project)

        for imp in raw_imports:
            # Try to resolve to a known file
            candidates = [
                imp,
                imp + '/index',
                imp + '/__init__',
            ]
            for candidate in candidates:
                if candidate in all_files:
                    target = all_files[candidate]
                    if target != rel_path:  # No self-imports
                        graph[rel_path].add(target)
                        reverse_graph[target].add(rel_path)
                    break

    return {
        "files": {v: norm for norm, v in all_files.items()},
        "imports": {k: sorted(v) for k, v in graph.items()},
        "imported_by": {k: sorted(v) for k, v in reverse_graph.items()},
    }

detect_clusters

detect_clusters(graph: dict, min_cluster: int = 2) -> list[dict]

Detect clusters of tightly connected files using simple community detection.

Uses a greedy approach: start from each unvisited file, expand to include files that share the most import connections with the current cluster.

Source code in Tools/analyzers/boundary_detector.py
def detect_clusters(graph: dict, min_cluster: int = 2) -> list[dict]:
    """
    Detect clusters of tightly connected files using simple community detection.

    Uses a greedy approach: start from each unvisited file, expand to include
    files that share the most import connections with the current cluster.
    """
    imports = graph["imports"]
    imported_by = graph["imported_by"]

    # Build bidirectional adjacency
    adjacency = defaultdict(set)
    for source, targets in imports.items():
        for target in targets:
            adjacency[source].add(target)
            adjacency[target].add(source)

    all_nodes = set(imports.keys()) | set(imported_by.keys())
    visited = set()
    clusters = []

    # Sort by connectivity (most connected first)
    nodes_by_degree = sorted(all_nodes, key=lambda n: len(adjacency[n]), reverse=True)

    for seed in nodes_by_degree:
        if seed in visited:
            continue

        # BFS to find connected component
        cluster = set()
        queue = [seed]

        while queue:
            node = queue.pop(0)
            if node in visited:
                continue
            visited.add(node)
            cluster.add(node)

            for neighbor in adjacency[node]:
                if neighbor not in visited:
                    queue.append(neighbor)

        if len(cluster) >= min_cluster:
            # Find common directory prefix
            parts_list = [f.split('/') for f in cluster]
            common_prefix = []
            if parts_list:
                for level_parts in zip(*parts_list):
                    if len(set(level_parts)) == 1:
                        common_prefix.append(level_parts[0])
                    else:
                        break

            # Calculate internal vs external imports
            internal_edges = 0
            external_deps = set()
            external_dependents = set()

            for f in cluster:
                for imp in imports.get(f, []):
                    if imp in cluster:
                        internal_edges += 1
                    else:
                        external_deps.add(imp)

                for dep in imported_by.get(f, []):
                    if dep not in cluster:
                        external_dependents.add(dep)

            cohesion = internal_edges / max(len(cluster), 1)

            clusters.append({
                "id": len(clusters),
                "files": sorted(cluster),
                "file_count": len(cluster),
                "common_prefix": '/'.join(common_prefix) if common_prefix else None,
                "internal_edges": internal_edges,
                "cohesion": round(cohesion, 2),
                "external_deps": sorted(external_deps),
                "external_dep_count": len(external_deps),
                "external_dependents": sorted(external_dependents),
                "external_dependent_count": len(external_dependents),
            })

    # Sort by size
    clusters.sort(key=lambda c: c["file_count"], reverse=True)
    return clusters

find_bridge_files

find_bridge_files(graph: dict) -> list[dict]

Find files that connect multiple clusters (high betweenness).

Source code in Tools/analyzers/boundary_detector.py
def find_bridge_files(graph: dict) -> list[dict]:
    """Find files that connect multiple clusters (high betweenness)."""
    imports = graph["imports"]
    imported_by = graph["imported_by"]

    bridges = []

    for filepath in set(imports.keys()) | set(imported_by.keys()):
        importers = set(imported_by.get(filepath, []))
        importees = set(imports.get(filepath, []))

        # A bridge connects different directories
        all_connected = importers | importees
        dirs = set()
        for f in all_connected:
            parts = f.split('/')
            if len(parts) > 1:
                dirs.add(parts[0] if len(parts) <= 2 else '/'.join(parts[:2]))

        if len(dirs) >= 2:
            bridges.append({
                "file": filepath,
                "connects_dirs": sorted(dirs),
                "imported_by_count": len(importers),
                "imports_count": len(importees),
                "total_connections": len(all_connected),
            })

    bridges.sort(key=lambda b: b["total_connections"], reverse=True)
    return bridges[:30]

find_orphans

find_orphans(graph: dict, all_source_files: set) -> list[str]

Find files with no import relationships (easy to extract/remove).

Source code in Tools/analyzers/boundary_detector.py
def find_orphans(graph: dict, all_source_files: set) -> list[str]:
    """Find files with no import relationships (easy to extract/remove)."""
    connected = set(graph["imports"].keys()) | set(graph["imported_by"].keys())
    return sorted(all_source_files - connected)

Interface Extractor

analyzers.interface_extractor

Interface Extractor - Extract public API surfaces from codebases.

Answers: "What does this module/package expose?" without reading implementations. An AI agent uses this to understand boundaries - what can be imported, called, or composed from each part of a project.

Extracts
  • Public function signatures (with types)
  • Public class interfaces (public methods, no internals)
  • Exported types, interfaces, enums
  • Module-level all / export declarations
  • Package-level re-exports

Supports: Python, JavaScript/TypeScript, Rust, Go, C#

Output: JSON (default) or markdown (--markdown)

Usage

python interface_extractor.py python interface_extractor.py --output interfaces.json python interface_extractor.py src/core/ --markdown

Functions

extract_interfaces

extract_interfaces(project_path: str) -> dict

Extract all public interfaces from a project.

Source code in Tools/analyzers/interface_extractor.py
def extract_interfaces(project_path: str) -> dict:
    """Extract all public interfaces from a project."""
    project = Path(project_path).resolve()

    modules = {}
    lang_summary = defaultdict(int)

    for root, dirs, files in os.walk(project):
        dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith('.')]

        for filename in sorted(files):
            filepath = Path(root) / filename
            ext = filepath.suffix.lower()

            if ext not in LANG_MAP:
                continue

            lang, extractor = LANG_MAP[ext]
            interface = extractor(filepath)

            if interface:
                rel_path = str(filepath.relative_to(project))
                interface["lang"] = lang
                modules[rel_path] = interface
                lang_summary[lang] += 1

    return {
        "project": project.name,
        "root": str(project),
        "summary": {
            "modules_with_interfaces": len(modules),
            "by_language": dict(lang_summary),
        },
        "modules": modules,
    }

extract_python_interface

extract_python_interface(filepath: Path) -> Optional[dict]

Extract public interface from a Python file.

Source code in Tools/analyzers/interface_extractor.py
def extract_python_interface(filepath: Path) -> Optional[dict]:
    """Extract public interface from a Python file."""
    try:
        content = filepath.read_text(encoding='utf-8', errors='ignore')
        tree = ast.parse(content)
    except (SyntaxError, UnicodeDecodeError):
        return None

    interface = {
        "exports": [],
        "classes": [],
        "functions": [],
        "types": [],
        "constants": [],
    }

    # Check for __all__
    all_names = None
    for node in ast.iter_child_nodes(tree):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name) and target.id == '__all__':
                    if isinstance(node.value, (ast.List, ast.Tuple)):
                        all_names = set()
                        for elt in node.value.elts:
                            if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
                                all_names.add(elt.value)
                    interface["exports"] = [
                        {"name": n} for n in sorted(all_names)
                    ] if all_names else []

    for node in ast.iter_child_nodes(tree):
        # Skip private names
        name = getattr(node, 'name', None)
        if name and name.startswith('_') and not name.startswith('__'):
            continue

        # If __all__ exists, only include listed names
        if all_names is not None and name and name not in all_names:
            continue

        if isinstance(node, ast.ClassDef):
            cls_info = {
                "name": node.name,
                "bases": [ast.unparse(b) for b in node.bases],
                "decorators": [ast.unparse(d) for d in node.decorator_list],
                "methods": [],
                "class_methods": [],
                "properties": [],
                "line": node.lineno,
            }

            # Get docstring
            if (node.body and isinstance(node.body[0], ast.Expr)
                    and isinstance(node.body[0].value, ast.Constant)
                    and isinstance(node.body[0].value.value, str)):
                doc = node.body[0].value.value.strip()
                # First line only
                cls_info["doc"] = doc.split('\n')[0]

            for item in node.body:
                if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
                    if item.name.startswith('_') and item.name != '__init__':
                        continue

                    sig = _python_func_sig(item)

                    if any(isinstance(d, ast.Name) and d.id == 'property'
                           for d in item.decorator_list):
                        cls_info["properties"].append(sig)
                    elif any(isinstance(d, ast.Name) and d.id == 'classmethod'
                             for d in item.decorator_list):
                        cls_info["class_methods"].append(sig)
                    elif any(isinstance(d, ast.Name) and d.id == 'staticmethod'
                             for d in item.decorator_list):
                        cls_info["class_methods"].append(sig)
                    else:
                        cls_info["methods"].append(sig)

            interface["classes"].append(cls_info)

        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            sig = _python_func_sig(node)

            # Get docstring
            if (node.body and isinstance(node.body[0], ast.Expr)
                    and isinstance(node.body[0].value, ast.Constant)
                    and isinstance(node.body[0].value.value, str)):
                doc = node.body[0].value.value.strip()
                sig["doc"] = doc.split('\n')[0]

            interface["functions"].append(sig)

        elif isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name) and target.id.isupper():
                    try:
                        val = ast.unparse(node.value)
                        if len(val) < 120:
                            interface["constants"].append({
                                "name": target.id,
                                "value": val,
                                "line": node.lineno,
                            })
                    except Exception:
                        pass

        # TypeAlias (Python 3.12+) or simple type assignments
        elif isinstance(node, ast.AnnAssign):
            if isinstance(node.target, ast.Name):
                name = node.target.name if hasattr(node.target, 'name') else node.target.id
                if node.annotation:
                    ann = ast.unparse(node.annotation)
                    if 'TypeAlias' in ann or 'TypeVar' in ann:
                        val = ast.unparse(node.value) if node.value else ""
                        interface["types"].append({
                            "name": name,
                            "definition": val[:100],
                            "line": node.lineno,
                        })

    # Remove empty sections
    return {k: v for k, v in interface.items() if v}

extract_js_ts_interface

extract_js_ts_interface(filepath: Path) -> Optional[dict]

Extract public interface from a JS/TS file.

Source code in Tools/analyzers/interface_extractor.py
def extract_js_ts_interface(filepath: Path) -> Optional[dict]:
    """Extract public interface from a JS/TS file."""
    try:
        content = filepath.read_text(encoding='utf-8', errors='ignore')
    except Exception:
        return None

    interface = {
        "exports": [],
        "classes": [],
        "functions": [],
        "types": [],
        "constants": [],
    }

    lines = content.split('\n')

    for i, line in enumerate(lines, 1):
        # Exported functions
        m = re.match(
            r'^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*'
            r'(?:<[^>]*>)?\s*\(([^)]*)\)(?:\s*:\s*([^\{]+))?',
            line
        )
        if m:
            interface["functions"].append({
                "name": m.group(1),
                "args_raw": m.group(2).strip()[:100],
                "returns": m.group(3).strip() if m.group(3) else None,
                "line": i,
            })
            continue

        # Exported arrow functions
        m = re.match(
            r'^export\s+(?:default\s+)?(?:const|let)\s+(\w+)\s*'
            r'(?::\s*[^=]+)?\s*=\s*(?:async\s+)?'
            r'(?:\(([^)]*)\)|(\w+))\s*(?::\s*([^\s=>]+))?\s*=>',
            line
        )
        if m:
            interface["functions"].append({
                "name": m.group(1),
                "args_raw": (m.group(2) or m.group(3) or "").strip()[:100],
                "line": i,
            })
            continue

        # Exported classes
        m = re.match(
            r'^export\s+(?:default\s+)?(?:abstract\s+)?class\s+(\w+)'
            r'(?:\s+extends\s+([\w.]+))?'
            r'(?:\s+implements\s+([\w.,\s]+))?',
            line
        )
        if m:
            interface["classes"].append({
                "name": m.group(1),
                "extends": m.group(2),
                "implements": [x.strip() for x in m.group(3).split(',')] if m.group(3) else [],
                "line": i,
            })
            continue

        # Exported interfaces
        m = re.match(
            r'^export\s+(?:default\s+)?interface\s+(\w+)'
            r'(?:\s+extends\s+([\w.,\s]+))?',
            line
        )
        if m:
            interface["types"].append({
                "name": m.group(1),
                "kind": "interface",
                "extends": [x.strip() for x in m.group(2).split(',')] if m.group(2) else [],
                "line": i,
            })
            continue

        # Exported types
        m = re.match(r'^export\s+type\s+(\w+)(?:<[^>]*>)?\s*=\s*(.+)', line)
        if m:
            interface["types"].append({
                "name": m.group(1),
                "kind": "type",
                "definition": m.group(2).strip()[:100],
                "line": i,
            })
            continue

        # Exported enums
        m = re.match(r'^export\s+(?:const\s+)?enum\s+(\w+)', line)
        if m:
            interface["types"].append({
                "name": m.group(1),
                "kind": "enum",
                "line": i,
            })
            continue

        # Exported constants
        m = re.match(r'^export\s+const\s+(\w+)\s*(?::\s*([^=]+))?\s*=\s*(.+)', line)
        if m:
            interface["constants"].append({
                "name": m.group(1),
                "type": m.group(2).strip() if m.group(2) else None,
                "value": m.group(3).strip()[:80],
                "line": i,
            })
            continue

        # Re-exports
        m = re.match(r'^export\s*\{([^}]+)\}\s*(?:from\s*[\'"]([^\'"]+)[\'"])?', line)
        if m:
            names = [n.strip().split(' as ')[-1].strip()
                     for n in m.group(1).split(',')]
            source = m.group(2)
            for name in names:
                if name:
                    interface["exports"].append({
                        "name": name,
                        "from": source,
                        "line": i,
                    })

        # export * from
        m = re.match(r'^export\s*\*\s*(?:as\s+(\w+)\s+)?from\s*[\'"]([^\'"]+)[\'"]', line)
        if m:
            interface["exports"].append({
                "name": m.group(1) or "*",
                "from": m.group(2),
                "line": i,
            })

    return {k: v for k, v in interface.items() if v}

extract_rust_interface

extract_rust_interface(filepath: Path) -> Optional[dict]

Extract public interface from a Rust file.

Source code in Tools/analyzers/interface_extractor.py
def extract_rust_interface(filepath: Path) -> Optional[dict]:
    """Extract public interface from a Rust file."""
    try:
        content = filepath.read_text(encoding='utf-8', errors='ignore')
    except Exception:
        return None

    interface = {
        "functions": [],
        "structs": [],
        "traits": [],
        "enums": [],
        "types": [],
    }

    lines = content.split('\n')

    for i, line in enumerate(lines, 1):
        # Public functions
        m = re.match(
            r'^pub(?:\([^)]*\))?\s+(?:async\s+)?(?:unsafe\s+)?fn\s+(\w+)'
            r'\s*(?:<[^>]*>)?\s*\(([^)]*)\)(?:\s*->\s*(.+?))?(?:\s*(?:where|{))',
            line
        )
        if m:
            interface["functions"].append({
                "name": m.group(1),
                "args_raw": m.group(2).strip()[:100],
                "returns": m.group(3).strip() if m.group(3) else None,
                "line": i,
            })

        # Public structs
        m = re.match(r'^pub(?:\([^)]*\))?\s+struct\s+(\w+)', line)
        if m:
            interface["structs"].append({"name": m.group(1), "line": i})

        # Public traits
        m = re.match(r'^pub(?:\([^)]*\))?\s+trait\s+(\w+)', line)
        if m:
            interface["traits"].append({"name": m.group(1), "line": i})

        # Public enums
        m = re.match(r'^pub(?:\([^)]*\))?\s+enum\s+(\w+)', line)
        if m:
            interface["enums"].append({"name": m.group(1), "line": i})

        # Type aliases
        m = re.match(r'^pub(?:\([^)]*\))?\s+type\s+(\w+)(?:<[^>]*>)?\s*=\s*(.+);', line)
        if m:
            interface["types"].append({
                "name": m.group(1),
                "definition": m.group(2).strip()[:100],
                "line": i,
            })

    return {k: v for k, v in interface.items() if v}

extract_go_interface

extract_go_interface(filepath: Path) -> Optional[dict]

Extract public interface from a Go file (exported = capitalized).

Source code in Tools/analyzers/interface_extractor.py
def extract_go_interface(filepath: Path) -> Optional[dict]:
    """Extract public interface from a Go file (exported = capitalized)."""
    try:
        content = filepath.read_text(encoding='utf-8', errors='ignore')
    except Exception:
        return None

    interface = {
        "functions": [],
        "structs": [],
        "interfaces": [],
        "methods": [],
        "types": [],
    }

    lines = content.split('\n')

    # Get package name
    pkg = None
    for line in lines:
        m = re.match(r'^package\s+(\w+)', line)
        if m:
            pkg = m.group(1)
            break

    for i, line in enumerate(lines, 1):
        # Exported functions (capitalized)
        m = re.match(r'^func\s+([A-Z]\w*)\s*\(([^)]*)\)(?:\s*(?:\(([^)]*)\)|(\S+)))?', line)
        if m:
            interface["functions"].append({
                "name": m.group(1),
                "args_raw": m.group(2).strip()[:100],
                "returns": (m.group(3) or m.group(4) or "").strip()[:60] or None,
                "line": i,
            })

        # Exported methods
        m = re.match(
            r'^func\s+\(\w+\s+\*?(\w+)\)\s+([A-Z]\w*)\s*\(([^)]*)\)'
            r'(?:\s*(?:\(([^)]*)\)|(\S+)))?',
            line
        )
        if m:
            interface["methods"].append({
                "receiver": m.group(1),
                "name": m.group(2),
                "args_raw": m.group(3).strip()[:100],
                "returns": (m.group(4) or m.group(5) or "").strip()[:60] or None,
                "line": i,
            })

        # Exported structs
        m = re.match(r'^type\s+([A-Z]\w*)\s+struct\b', line)
        if m:
            interface["structs"].append({"name": m.group(1), "line": i})

        # Exported interfaces
        m = re.match(r'^type\s+([A-Z]\w*)\s+interface\b', line)
        if m:
            interface["interfaces"].append({"name": m.group(1), "line": i})

    result = {k: v for k, v in interface.items() if v}
    if pkg and result:
        result["package"] = pkg
    return result

Dependency Analyzer

analyzers.dependency_analyzer

Dependency Analyzer - Multi-language dependency detection.

Maps both internal and external dependencies for a project. Answers: "What does this project depend on?" and "What depends on what internally?"

Supports: Python, JavaScript/TypeScript, Rust, Go, C#, Java

For internal import graphs and boundary detection, see boundary_detector.py. This tool focuses on: - External/third-party dependency enumeration - Internal import graph (per-file) - Most imported/importing modules - Dependency counts by category (stdlib, third-party, local)

Output: JSON (default) or markdown (--markdown)

Usage

python dependency_analyzer.py python dependency_analyzer.py --output deps.json python dependency_analyzer.py --markdown

Functions

analyze_dependencies

analyze_dependencies(project_path: str) -> dict

Auto-detect language and analyze dependencies.

Source code in Tools/analyzers/dependency_analyzer.py
def analyze_dependencies(project_path: str) -> dict:
    """Auto-detect language and analyze dependencies."""
    project = Path(project_path).resolve()

    results = {
        "project": project.name,
        "root": str(project),
        "analyses": [],
    }

    # Detect and run appropriate analyzers
    if list(project.rglob("*.py"))[:1]:
        results["analyses"].append(analyze_python_deps(project))

    if (project / "package.json").exists():
        results["analyses"].append(analyze_node_deps(project))

    if (project / "Cargo.toml").exists():
        results["analyses"].append(analyze_rust_deps(project))

    if (project / "go.mod").exists():
        results["analyses"].append(analyze_go_deps(project))

    # Summary
    all_third_party = set()
    for analysis in results["analyses"]:
        all_third_party.update(analysis.get("third_party", []))
        all_third_party.update(analysis.get("used_packages", []))
        all_third_party.update(analysis.get("dependencies", {}).keys())
        all_third_party.update(analysis.get("requires", {}).keys())

    results["summary"] = {
        "languages": [a["language"] for a in results["analyses"]],
        "total_external_deps": len(all_third_party),
    }

    return results

analyze_python_deps

analyze_python_deps(project: Path) -> dict

Analyze Python project dependencies.

Source code in Tools/analyzers/dependency_analyzer.py
def analyze_python_deps(project: Path) -> dict:
    """Analyze Python project dependencies."""
    stdlib = get_stdlib_modules()
    project_packages = set()
    for path in project.rglob("__init__.py"):
        if not any(skip in str(path) for skip in SKIP_DIRS):
            project_packages.add(path.parent.name)

    all_third_party = set()
    all_stdlib = set()
    all_local = set()
    file_imports = {}
    import_graph = defaultdict(set)
    imported_by = defaultdict(set)

    for filepath in project.rglob("*.py"):
        if any(skip in str(filepath) for skip in SKIP_DIRS):
            continue

        try:
            content = filepath.read_text(encoding='utf-8', errors='ignore')
            tree = ast.parse(content)
        except (SyntaxError, UnicodeDecodeError):
            continue

        rel_path = str(filepath.relative_to(project))
        file_third = set()
        file_local = set()
        file_stdlib = set()

        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    mod = alias.name.split('.')[0]
                    if mod in stdlib:
                        file_stdlib.add(mod)
                    elif mod in project_packages:
                        file_local.add(mod)
                    else:
                        file_third.add(mod)

            elif isinstance(node, ast.ImportFrom):
                if node.level > 0:
                    if node.module:
                        file_local.add(f".{node.module}")
                elif node.module:
                    mod = node.module.split('.')[0]
                    if mod in stdlib:
                        file_stdlib.add(mod)
                    elif mod in project_packages:
                        file_local.add(mod)
                    else:
                        file_third.add(mod)

        all_third_party.update(file_third)
        all_stdlib.update(file_stdlib)
        all_local.update(file_local)

        if file_third or file_local:
            file_imports[rel_path] = {
                "third_party": sorted(file_third),
                "local": sorted(file_local),
            }

        module_name = rel_path.replace('/', '.').replace('.py', '')
        for imp in file_third | file_local:
            import_graph[module_name].add(imp)
            imported_by[imp].add(module_name)

    return {
        "language": "python",
        "files_analyzed": len(file_imports),
        "third_party": sorted(all_third_party),
        "stdlib": sorted(all_stdlib),
        "local": sorted(all_local),
        "file_imports": file_imports,
        "most_imported": sorted(
            [(mod, len(deps)) for mod, deps in imported_by.items()],
            key=lambda x: x[1], reverse=True
        )[:20],
        "most_importing": sorted(
            [(mod, len(deps)) for mod, deps in import_graph.items()],
            key=lambda x: x[1], reverse=True
        )[:20],
    }

analyze_node_deps

analyze_node_deps(project: Path) -> dict

Analyze Node.js/TypeScript project dependencies.

Source code in Tools/analyzers/dependency_analyzer.py
def analyze_node_deps(project: Path) -> dict:
    """Analyze Node.js/TypeScript project dependencies."""
    declared_deps = {}
    declared_dev_deps = {}

    # Read package.json
    pkg_json = project / "package.json"
    if pkg_json.exists():
        try:
            data = json.loads(pkg_json.read_text())
            declared_deps = data.get("dependencies", {})
            declared_dev_deps = data.get("devDependencies", {})
        except Exception:
            pass

    # Scan actual imports
    used_packages = set()
    file_imports = {}

    for ext in ['.js', '.jsx', '.mjs', '.ts', '.tsx']:
        for filepath in project.rglob(f"*{ext}"):
            if any(skip in str(filepath) for skip in SKIP_DIRS):
                continue

            try:
                content = filepath.read_text(encoding='utf-8', errors='ignore')
            except Exception:
                continue

            rel_path = str(filepath.relative_to(project))
            external = set()
            local = set()

            # import/export from and require()
            patterns = [
                r"(?:import|export)\s+.*?from\s+['\"]([^'\"]+)['\"]",
                r"require\s*\(\s*['\"]([^'\"]+)['\"]\s*\)",
            ]

            for pattern in patterns:
                for match in re.finditer(pattern, content):
                    dep = match.group(1)
                    if dep.startswith('.') or dep.startswith('/'):
                        local.add(dep)
                    else:
                        # Extract package name (handle scoped packages)
                        if dep.startswith('@'):
                            parts = dep.split('/')
                            pkg = '/'.join(parts[:2]) if len(parts) >= 2 else dep
                        else:
                            pkg = dep.split('/')[0]
                        external.add(pkg)
                        used_packages.add(pkg)

            if external or local:
                file_imports[rel_path] = {
                    "external": sorted(external),
                    "local_count": len(local),
                }

    # Find unused declared deps
    all_declared = set(declared_deps.keys()) | set(declared_dev_deps.keys())
    unused = sorted(all_declared - used_packages)
    undeclared = sorted(used_packages - all_declared)

    return {
        "language": "javascript/typescript",
        "files_analyzed": len(file_imports),
        "declared_deps": declared_deps,
        "declared_dev_deps": declared_dev_deps,
        "used_packages": sorted(used_packages),
        "unused_declared": unused,
        "undeclared_used": undeclared,
        "file_imports": file_imports,
    }

analyze_rust_deps

analyze_rust_deps(project: Path) -> dict

Analyze Rust project dependencies from Cargo.toml.

Source code in Tools/analyzers/dependency_analyzer.py
def analyze_rust_deps(project: Path) -> dict:
    """Analyze Rust project dependencies from Cargo.toml."""
    deps = {}
    dev_deps = {}
    build_deps = {}

    cargo = project / "Cargo.toml"
    if cargo.exists():
        try:
            content = cargo.read_text()

            # Parse [dependencies], [dev-dependencies], [build-dependencies]
            current_section = None
            for line in content.split('\n'):
                line = line.strip()
                m = re.match(r'^\[([^\]]+)\]', line)
                if m:
                    current_section = m.group(1)
                    continue

                if '=' in line and current_section:
                    parts = line.split('=', 1)
                    name = parts[0].strip()
                    val = parts[1].strip().strip('"\'')

                    if current_section == "dependencies":
                        deps[name] = val
                    elif current_section == "dev-dependencies":
                        dev_deps[name] = val
                    elif current_section == "build-dependencies":
                        build_deps[name] = val
        except Exception:
            pass

    # Scan for use statements
    used_crates = set()
    for filepath in project.rglob("*.rs"):
        if any(skip in str(filepath) for skip in SKIP_DIRS):
            continue
        try:
            content = filepath.read_text(encoding='utf-8', errors='ignore')
            for m in re.finditer(r'(?:use|extern crate)\s+(\w+)', content):
                crate = m.group(1)
                if crate not in ('crate', 'self', 'super', 'std', 'core', 'alloc'):
                    used_crates.add(crate)
        except Exception:
            pass

    return {
        "language": "rust",
        "dependencies": deps,
        "dev_dependencies": dev_deps,
        "build_dependencies": build_deps,
        "used_crates": sorted(used_crates),
    }

analyze_go_deps

analyze_go_deps(project: Path) -> dict

Analyze Go project dependencies from go.mod.

Source code in Tools/analyzers/dependency_analyzer.py
def analyze_go_deps(project: Path) -> dict:
    """Analyze Go project dependencies from go.mod."""
    module_name = ""
    go_version = ""
    requires = {}

    go_mod = project / "go.mod"
    if go_mod.exists():
        try:
            content = go_mod.read_text()
            m = re.search(r'^module\s+(\S+)', content, re.MULTILINE)
            if m:
                module_name = m.group(1)
            m = re.search(r'^go\s+(\S+)', content, re.MULTILINE)
            if m:
                go_version = m.group(1)

            in_require = False
            for line in content.split('\n'):
                line = line.strip()
                if line.startswith('require ('):
                    in_require = True
                    continue
                if in_require and line == ')':
                    in_require = False
                    continue
                if in_require:
                    parts = line.split()
                    if len(parts) >= 2:
                        requires[parts[0]] = parts[1]
                elif line.startswith('require '):
                    parts = line.split()
                    if len(parts) >= 3:
                        requires[parts[1]] = parts[2]
        except Exception:
            pass

    return {
        "language": "go",
        "module": module_name,
        "go_version": go_version,
        "requires": requires,
    }

Structure Analyzer

analyzers.structure_analyzer

Structure Analyzer

Analyzes the directory structure of a project and produces a summary report.

Usage

python structure_analyzer.py [--output ]

Example

python structure_analyzer.py ../Ship_Yard/_intake/semantic-kernel

Functions

analyze_structure

analyze_structure(project_path: str) -> dict

Analyze the structure of a project directory.

Source code in Tools/analyzers/structure_analyzer.py
def analyze_structure(project_path: str) -> dict:
    """Analyze the structure of a project directory."""

    project = Path(project_path)
    if not project.exists():
        raise FileNotFoundError(f"Project path not found: {project_path}")

    stats = {
        "total_files": 0,
        "total_dirs": 0,
        "total_size_bytes": 0,
        "file_types": defaultdict(lambda: {"count": 0, "size": 0}),
        "top_level_dirs": [],
        "largest_files": [],
        "deepest_path": {"path": "", "depth": 0},
    }

    all_files = []

    for root, dirs, files in os.walk(project):
        # Skip hidden and common ignore directories
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {
            'node_modules', '__pycache__', 'venv', '.venv', 'dist', 'build',
            '.git', '.hg', '.svn'
        }]

        rel_root = Path(root).relative_to(project)
        depth = len(rel_root.parts)

        # Track deepest path
        if depth > stats["deepest_path"]["depth"]:
            stats["deepest_path"] = {"path": str(rel_root), "depth": depth}

        stats["total_dirs"] += len(dirs)

        # Track top-level directories
        if rel_root == Path('.'):
            stats["top_level_dirs"] = sorted(dirs)

        for file in files:
            if file.startswith('.'):
                continue

            file_path = Path(root) / file
            stats["total_files"] += 1

            try:
                size = file_path.stat().st_size
            except (OSError, PermissionError):
                size = 0

            stats["total_size_bytes"] += size

            # Track file types
            ext = file_path.suffix.lower() or "(no extension)"
            stats["file_types"][ext]["count"] += 1
            stats["file_types"][ext]["size"] += size

            all_files.append({
                "path": str(file_path.relative_to(project)),
                "size": size
            })

    # Get largest files
    all_files.sort(key=lambda x: x["size"], reverse=True)
    stats["largest_files"] = all_files[:20]

    # Convert defaultdict to regular dict
    stats["file_types"] = dict(stats["file_types"])

    return stats

Platform Detector

analyzers.platform_detector

Platform Detector - Detect programming platforms in projects

Based on Microsoft Oryx detection patterns. Auto-classifies projects by scanning for marker files.

Usage

python platform_detector.py [--json][--output ]

Example

python platform_detector.py ../Ship_Yard/_intake/semantic-kernel python platform_detector.py ../Projects/my-app --json

Functions

detect_all_platforms

detect_all_platforms(project_path: Path) -> DetectionResult

Detect all platforms in a project.

Parameters:

Name Type Description Default
project_path Path

Path to the project

required

Returns:

Type Description
DetectionResult

DetectionResult with all detected platforms

Source code in Tools/analyzers/platform_detector.py
def detect_all_platforms(project_path: Path) -> DetectionResult:
    """
    Detect all platforms in a project.

    Args:
        project_path: Path to the project

    Returns:
        DetectionResult with all detected platforms
    """
    project_path = project_path.resolve()

    result = DetectionResult(
        project_path=str(project_path),
        project_name=project_path.name
    )

    for platform, config in PLATFORM_CONFIG.items():
        detection = detect_platform(project_path, platform, config)
        if detection:
            result.platforms.append(detection)

    result.is_multi_platform = len(result.platforms) > 1

    return result

detect_platform

detect_platform(project_path: Path, platform: str, config: dict) -> Optional[PlatformDetection]

Detect if a specific platform is present in the project.

Parameters:

Name Type Description Default
project_path Path

Path to the project

required
platform str

Platform name

required
config dict

Platform configuration

required

Returns:

Type Description
Optional[PlatformDetection]

PlatformDetection if found, None otherwise

Source code in Tools/analyzers/platform_detector.py
def detect_platform(
    project_path: Path,
    platform: str,
    config: dict
) -> Optional[PlatformDetection]:
    """
    Detect if a specific platform is present in the project.

    Args:
        project_path: Path to the project
        platform: Platform name
        config: Platform configuration

    Returns:
        PlatformDetection if found, None otherwise
    """
    found_markers = []

    # Check marker files
    for marker in config["marker_files"]:
        if "*" in marker:
            # Glob pattern
            matches = list(project_path.glob(marker))
            if matches:
                found_markers.extend([str(m.relative_to(project_path)) for m in matches[:3]])
        else:
            if (project_path / marker).exists():
                found_markers.append(marker)

    if not found_markers:
        return None

    # Try to detect version
    version = None
    version_source = None

    for version_file, method in config.get("version_files", []):
        version_path = project_path / version_file

        # Handle glob patterns
        if "*" in version_file:
            matches = list(project_path.glob(version_file))
            if matches:
                version_path = matches[0]
            else:
                continue

        if not version_path.exists():
            continue

        try:
            version = extract_version(version_path, method)
            if version:
                version_source = str(version_path.relative_to(project_path))
                break
        except Exception:
            continue

    return PlatformDetection(
        platform=platform,
        version=version,
        confidence="high" if len(found_markers) > 1 else "medium",
        marker_files=found_markers,
        version_source=version_source
    )