Arena¶

The core orchestration component where agents compete for leadership.

Arena¶

`orc.arena.arena.Arena` ¶

The Arena where agents compete for leadership.

Example

arena = Arena( agents=[DataAgent(), ReportAgent(), AnalyticsAgent()], judge=LLMJudge(llm), config=ArenaConfig(challenge_probability=0.3), )

Process a task - may trigger a trial¶

result = await arena.process("Analyze Q4 sales data")

Check who leads which domain¶

warlord = arena.get_warlord("data") print(f"Data domain Warlord: {warlord}")

Source code in orc/arena/arena.py

class Arena:
    """
    The Arena where agents compete for leadership.

    Example:
        arena = Arena(
            agents=[DataAgent(), ReportAgent(), AnalyticsAgent()],
            judge=LLMJudge(llm),
            config=ArenaConfig(challenge_probability=0.3),
        )

        # Process a task - may trigger a trial
        result = await arena.process("Analyze Q4 sales data")

        # Check who leads which domain
        warlord = arena.get_warlord("data")
        print(f"Data domain Warlord: {warlord}")
    """

    def __init__(
        self,
        agents: List[Agent],
        judge: Judge,
        config: Optional[ArenaConfig] = None,
        reputation_store: Optional[ReputationStore] = None,
        # Hooks
        on_challenge: Optional[Callable[[str, str, str], None]] = None,
        on_succession: Optional[Callable[[str, str, str], None]] = None,
        on_trial_complete: Optional[Callable[[Verdict], None]] = None,
    ):
        """
        Initialize the Arena.

        Args:
            agents: List of agents that can compete.
            judge: Judge to evaluate trial outcomes.
            config: Arena configuration.
            reputation_store: Optional persistent storage for reputation.
            on_challenge: Hook called when a challenge is issued.
            on_succession: Hook called when leadership changes.
            on_trial_complete: Hook called when a trial completes.
        """
        self.judge = judge
        self.config = config or ArenaConfig()
        self.reputation_store = reputation_store

        # Hooks
        self._on_challenge = on_challenge
        self._on_succession = on_succession
        self._on_trial_complete = on_trial_complete

        # Initialize agent states
        self._agents: Dict[str, AgentState] = {}
        for agent in agents:
            self._agents[agent.name] = AgentState(
                agent=agent,
                reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
            )

        # Domain -> current warlord name
        self._warlords: Dict[str, str] = {}

        # Initialize warlords (first agent claiming each domain)
        self._initialize_warlords()

        # Trial history
        self._trial_history: List[TrialResult] = []

    def _get_domains(self, agent: Agent) -> List[str]:
        """Get domains for an agent."""
        if hasattr(agent, "domains"):
            return agent.domains
        # Fallback: use capabilities as domains
        return agent.capabilities

    def _initialize_warlords(self):
        """Set initial warlords based on first-come-first-serve."""
        for name, state in self._agents.items():
            for domain in self._get_domains(state.agent):
                if domain not in self._warlords:
                    self._warlords[domain] = name
                    state.is_warlord.add(domain)

    async def process(
        self,
        task: str,
        domain: Optional[str] = None,
        context: Optional[Dict[str, Any]] = None,
    ) -> TrialResult:
        """
        Process a task through the arena.

        This may trigger a trial if:
        1. Multiple agents claim the domain
        2. A challenger decides to challenge
        3. Challenge conditions are met

        Args:
            task: The task to execute.
            domain: Optional domain hint. If not provided, will be inferred.
            context: Optional execution context.

        Returns:
            TrialResult with the outcome.
        """
        context = context or {}
        context["task_id"] = context.get("task_id", str(uuid.uuid4()))

        # Determine domain
        if not domain:
            domain = await self._infer_domain(task)

        # Get current warlord
        warlord_name = self._warlords.get(domain)
        if not warlord_name:
            # No warlord - elect one
            warlord_name = await self._elect_warlord(domain)

        warlord_state = self._agents[warlord_name]

        # Check for challengers
        challenger = await self._find_challenger(task, domain, warlord_name)

        if challenger:
            # TRIAL BY COMBAT!
            if self._on_challenge:
                self._on_challenge(warlord_name, challenger, domain)

            trial = Trial(
                task=task,
                domain=domain,
                warlord=warlord_state.agent,
                challenger=self._agents[challenger].agent,
                judge=self.judge,
                context=context,
                timeout=self.config.trial_timeout_seconds,
                parallel=self.config.parallel_trial_execution,
            )

            result = await trial.execute()
            self._trial_history.append(result)

            # Update leadership
            await self._process_trial_result(result, domain, warlord_name, challenger)

            if self._on_trial_complete:
                self._on_trial_complete(result.verdict)

            return result

        else:
            # No challenge - warlord executes
            task_result = await warlord_state.agent.process_task(task, context)

            return TrialResult(
                task=task,
                domain=domain,
                winner=warlord_name,
                winner_result=task_result,
                was_challenged=False,
                verdict=None,
            )

    async def _infer_domain(self, task: str) -> str:
        """Infer the domain from the task description."""
        # Simple heuristic: find domain with most keyword matches
        task_lower = task.lower()
        scores: Dict[str, int] = {}

        for domain in self._warlords.keys():
            score = task_lower.count(domain.lower())
            # Also check agent capabilities
            for name, state in self._agents.items():
                for cap in state.agent.capabilities:
                    if cap.lower() in task_lower:
                        for d in self._get_domains(state.agent):
                            scores[d] = scores.get(d, 0) + 1

            scores[domain] = scores.get(domain, 0) + score

        if scores:
            return max(scores, key=scores.get)

        # Fallback: return first domain
        return list(self._warlords.keys())[0] if self._warlords else "general"

    async def _elect_warlord(self, domain: str) -> str:
        """Elect a warlord for a domain with no current leader."""
        candidates = [
            name for name, state in self._agents.items()
            if domain in self._get_domains(state.agent)
        ]

        if not candidates:
            # No one claims this domain - assign to highest reputation agent
            candidates = list(self._agents.keys())

        # Pick highest reputation
        best = max(candidates, key=lambda n: self._agents[n].reputation.get(domain, 0))
        self._warlords[domain] = best
        self._agents[best].is_warlord.add(domain)
        return best

    async def _find_challenger(
        self,
        task: str,
        domain: str,
        warlord_name: str,
    ) -> Optional[str]:
        """Find an agent willing to challenge the warlord."""
        candidates = []

        for name, state in self._agents.items():
            if name == warlord_name:
                continue

            # Check if agent claims this domain
            if domain not in self._get_domains(state.agent):
                continue

            # Check cooldown
            last_challenge = state.last_challenge_time.get(domain)
            if last_challenge:
                elapsed = (datetime.now(timezone.utc) - last_challenge).total_seconds()
                if elapsed < self.config.challenge_cooldown_seconds:
                    continue

            # Check minimum reputation
            rep = state.reputation.get(domain, self.config.default_reputation)
            if rep < self.config.min_reputation_to_challenge:
                continue

            # Check challenge strategy
            strategy = getattr(state.agent, "challenge_strategy", AlwaysChallenge())
            if await self._should_challenge(strategy, state, domain, warlord_name):
                candidates.append(name)

        if not candidates:
            return None

        # Random selection weighted by reputation
        weights = [
            self._agents[n].reputation.get(domain, self.config.default_reputation)
            for n in candidates
        ]
        total = sum(weights)
        if total == 0:
            return random.choice(candidates)

        r = random.random() * total
        cumulative = 0
        for name, weight in zip(candidates, weights):
            cumulative += weight
            if r <= cumulative:
                return name

        return candidates[-1]

    async def _should_challenge(
        self,
        strategy: ChallengeStrategy,
        state: AgentState,
        domain: str,
        warlord_name: str,
    ) -> bool:
        """Check if an agent should challenge based on strategy."""
        # Base probability check
        if random.random() > self.config.challenge_probability:
            return False

        # Strategy check
        warlord_rep = self._agents[warlord_name].reputation.get(
            domain, self.config.default_reputation
        )
        challenger_rep = state.reputation.get(domain, self.config.default_reputation)

        return strategy.should_challenge(
            domain=domain,
            warlord_name=warlord_name,
            warlord_reputation=warlord_rep,
            challenger_reputation=challenger_rep,
        )

    async def _process_trial_result(
        self,
        result: TrialResult,
        domain: str,
        warlord_name: str,
        challenger_name: str,
    ):
        """Process trial result and update leadership."""
        warlord_state = self._agents[warlord_name]
        challenger_state = self._agents[challenger_name]

        # Update challenge time
        challenger_state.last_challenge_time[domain] = datetime.now(timezone.utc)

        if result.winner == challenger_name:
            # SUCCESSION!
            # Update trial counts
            challenger_state.trial_wins[domain] = challenger_state.trial_wins.get(domain, 0) + 1
            warlord_state.trial_losses[domain] = warlord_state.trial_losses.get(domain, 0) + 1

            # Update reputation
            challenger_state.reputation[domain] = min(
                1.0, challenger_state.reputation.get(domain, 0.5) + 0.1
            )
            warlord_state.reputation[domain] = max(
                0.0, warlord_state.reputation.get(domain, 0.5) - 0.1
            )

            # Transfer leadership
            warlord_state.is_warlord.discard(domain)
            warlord_state.consecutive_defenses[domain] = 0
            challenger_state.is_warlord.add(domain)
            challenger_state.consecutive_defenses[domain] = 0
            self._warlords[domain] = challenger_name

            if self._on_succession:
                self._on_succession(warlord_name, challenger_name, domain)

        else:
            # Warlord defends!
            warlord_state.trial_wins[domain] = warlord_state.trial_wins.get(domain, 0) + 1
            challenger_state.trial_losses[domain] = challenger_state.trial_losses.get(domain, 0) + 1

            # Update reputation
            warlord_state.reputation[domain] = min(
                1.0, warlord_state.reputation.get(domain, 0.5) + 0.05
            )
            challenger_state.reputation[domain] = max(
                0.0, challenger_state.reputation.get(domain, 0.5) - 0.05
            )

            # Track consecutive defenses
            warlord_state.consecutive_defenses[domain] = (
                warlord_state.consecutive_defenses.get(domain, 0) + 1
            )
            warlord_state.last_defense_time[domain] = datetime.now(timezone.utc)

            # Check for forced rotation
            if warlord_state.consecutive_defenses[domain] >= self.config.max_consecutive_defenses:
                # Force rotation to second-highest reputation
                await self._force_rotation(domain, warlord_name)

        # Persist reputation if store available
        if self.reputation_store:
            await self.reputation_store.update_reputation(
                challenger_name,
                domain,
                challenger_state.reputation[domain] - self.config.default_reputation,
            )
            await self.reputation_store.update_reputation(
                warlord_name,
                domain,
                warlord_state.reputation[domain] - self.config.default_reputation,
            )

    async def _force_rotation(self, domain: str, current_warlord: str):
        """Force leadership rotation after too many consecutive defenses."""
        # Find second-highest reputation
        candidates = [
            (name, state.reputation.get(domain, 0))
            for name, state in self._agents.items()
            if name != current_warlord and domain in self._get_domains(state.agent)
        ]

        if candidates:
            new_warlord = max(candidates, key=lambda x: x[1])[0]
            self._agents[current_warlord].is_warlord.discard(domain)
            self._agents[current_warlord].consecutive_defenses[domain] = 0
            self._agents[new_warlord].is_warlord.add(domain)
            self._warlords[domain] = new_warlord

            if self._on_succession:
                self._on_succession(current_warlord, new_warlord, domain)

    # Public API

    def get_warlord(self, domain: str) -> Optional[str]:
        """Get the current Warlord for a domain."""
        return self._warlords.get(domain)

    def get_reputation(self, agent_name: str, domain: str) -> float:
        """Get an agent's reputation for a domain."""
        if agent_name not in self._agents:
            return 0.0
        return self._agents[agent_name].reputation.get(domain, self.config.default_reputation)

    def get_leaderboard(self, domain: str, limit: int = 10) -> List[Dict[str, Any]]:
        """Get the reputation leaderboard for a domain."""
        scores = [
            {
                "agent": name,
                "reputation": state.reputation.get(domain, self.config.default_reputation),
                "wins": state.trial_wins.get(domain, 0),
                "losses": state.trial_losses.get(domain, 0),
                "is_warlord": domain in state.is_warlord,
            }
            for name, state in self._agents.items()
            if domain in self._get_domains(state.agent)
        ]

        return sorted(scores, key=lambda x: x["reputation"], reverse=True)[:limit]

    def get_trial_history(self, limit: int = 50) -> List[TrialResult]:
        """Get recent trial history."""
        return self._trial_history[-limit:]

    def register_agent(self, agent: Agent):
        """Register a new agent in the arena."""
        self._agents[agent.name] = AgentState(
            agent=agent,
            reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
        )

    def unregister_agent(self, agent_name: str):
        """Remove an agent from the arena."""
        if agent_name in self._agents:
            state = self._agents[agent_name]
            # Remove from warlord positions
            for domain in list(state.is_warlord):
                if self._warlords.get(domain) == agent_name:
                    del self._warlords[domain]
            del self._agents[agent_name]

`init(agents, judge, config=None, reputation_store=None, on_challenge=None, on_succession=None, on_trial_complete=None)` ¶

Initialize the Arena.

Parameters:

Name	Type	Description	Default
`agents`	`List[Agent]`	List of agents that can compete.	required
`judge`	`Judge`	Judge to evaluate trial outcomes.	required
`config`	`Optional[ArenaConfig]`	Arena configuration.	`None`
`reputation_store`	`Optional[ReputationStore]`	Optional persistent storage for reputation.	`None`
`on_challenge`	`Optional[Callable[[str, str, str], None]]`	Hook called when a challenge is issued.	`None`
`on_succession`	`Optional[Callable[[str, str, str], None]]`	Hook called when leadership changes.	`None`
`on_trial_complete`	`Optional[Callable[[Verdict], None]]`	Hook called when a trial completes.	`None`

Source code in orc/arena/arena.py

def __init__(
    self,
    agents: List[Agent],
    judge: Judge,
    config: Optional[ArenaConfig] = None,
    reputation_store: Optional[ReputationStore] = None,
    # Hooks
    on_challenge: Optional[Callable[[str, str, str], None]] = None,
    on_succession: Optional[Callable[[str, str, str], None]] = None,
    on_trial_complete: Optional[Callable[[Verdict], None]] = None,
):
    """
    Initialize the Arena.

    Args:
        agents: List of agents that can compete.
        judge: Judge to evaluate trial outcomes.
        config: Arena configuration.
        reputation_store: Optional persistent storage for reputation.
        on_challenge: Hook called when a challenge is issued.
        on_succession: Hook called when leadership changes.
        on_trial_complete: Hook called when a trial completes.
    """
    self.judge = judge
    self.config = config or ArenaConfig()
    self.reputation_store = reputation_store

    # Hooks
    self._on_challenge = on_challenge
    self._on_succession = on_succession
    self._on_trial_complete = on_trial_complete

    # Initialize agent states
    self._agents: Dict[str, AgentState] = {}
    for agent in agents:
        self._agents[agent.name] = AgentState(
            agent=agent,
            reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
        )

    # Domain -> current warlord name
    self._warlords: Dict[str, str] = {}

    # Initialize warlords (first agent claiming each domain)
    self._initialize_warlords()

    # Trial history
    self._trial_history: List[TrialResult] = []

`get_leaderboard(domain, limit=10)` ¶

Get the reputation leaderboard for a domain.

Source code in orc/arena/arena.py

def get_leaderboard(self, domain: str, limit: int = 10) -> List[Dict[str, Any]]:
    """Get the reputation leaderboard for a domain."""
    scores = [
        {
            "agent": name,
            "reputation": state.reputation.get(domain, self.config.default_reputation),
            "wins": state.trial_wins.get(domain, 0),
            "losses": state.trial_losses.get(domain, 0),
            "is_warlord": domain in state.is_warlord,
        }
        for name, state in self._agents.items()
        if domain in self._get_domains(state.agent)
    ]

    return sorted(scores, key=lambda x: x["reputation"], reverse=True)[:limit]

`get_reputation(agent_name, domain)` ¶

Get an agent's reputation for a domain.

Source code in orc/arena/arena.py

def get_reputation(self, agent_name: str, domain: str) -> float:
    """Get an agent's reputation for a domain."""
    if agent_name not in self._agents:
        return 0.0
    return self._agents[agent_name].reputation.get(domain, self.config.default_reputation)

`get_trial_history(limit=50)` ¶

Get recent trial history.

Source code in orc/arena/arena.py

def get_trial_history(self, limit: int = 50) -> List[TrialResult]:
    """Get recent trial history."""
    return self._trial_history[-limit:]

`get_warlord(domain)` ¶

Get the current Warlord for a domain.

Source code in orc/arena/arena.py

def get_warlord(self, domain: str) -> Optional[str]:
    """Get the current Warlord for a domain."""
    return self._warlords.get(domain)

`process(task, domain=None, context=None)` `async` ¶

Process a task through the arena.

This may trigger a trial if: 1. Multiple agents claim the domain 2. A challenger decides to challenge 3. Challenge conditions are met

Parameters:

Name	Type	Description	Default
`task`	`str`	The task to execute.	required
`domain`	`Optional[str]`	Optional domain hint. If not provided, will be inferred.	`None`
`context`	`Optional[Dict[str, Any]]`	Optional execution context.	`None`

Returns:

Type	Description
`TrialResult`	TrialResult with the outcome.

Source code in orc/arena/arena.py

async def process(
    self,
    task: str,
    domain: Optional[str] = None,
    context: Optional[Dict[str, Any]] = None,
) -> TrialResult:
    """
    Process a task through the arena.

    This may trigger a trial if:
    1. Multiple agents claim the domain
    2. A challenger decides to challenge
    3. Challenge conditions are met

    Args:
        task: The task to execute.
        domain: Optional domain hint. If not provided, will be inferred.
        context: Optional execution context.

    Returns:
        TrialResult with the outcome.
    """
    context = context or {}
    context["task_id"] = context.get("task_id", str(uuid.uuid4()))

    # Determine domain
    if not domain:
        domain = await self._infer_domain(task)

    # Get current warlord
    warlord_name = self._warlords.get(domain)
    if not warlord_name:
        # No warlord - elect one
        warlord_name = await self._elect_warlord(domain)

    warlord_state = self._agents[warlord_name]

    # Check for challengers
    challenger = await self._find_challenger(task, domain, warlord_name)

    if challenger:
        # TRIAL BY COMBAT!
        if self._on_challenge:
            self._on_challenge(warlord_name, challenger, domain)

        trial = Trial(
            task=task,
            domain=domain,
            warlord=warlord_state.agent,
            challenger=self._agents[challenger].agent,
            judge=self.judge,
            context=context,
            timeout=self.config.trial_timeout_seconds,
            parallel=self.config.parallel_trial_execution,
        )

        result = await trial.execute()
        self._trial_history.append(result)

        # Update leadership
        await self._process_trial_result(result, domain, warlord_name, challenger)

        if self._on_trial_complete:
            self._on_trial_complete(result.verdict)

        return result

    else:
        # No challenge - warlord executes
        task_result = await warlord_state.agent.process_task(task, context)

        return TrialResult(
            task=task,
            domain=domain,
            winner=warlord_name,
            winner_result=task_result,
            was_challenged=False,
            verdict=None,
        )

`register_agent(agent)` ¶

Register a new agent in the arena.

Source code in orc/arena/arena.py

def register_agent(self, agent: Agent):
    """Register a new agent in the arena."""
    self._agents[agent.name] = AgentState(
        agent=agent,
        reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
    )

`unregister_agent(agent_name)` ¶

Remove an agent from the arena.

Source code in orc/arena/arena.py

def unregister_agent(self, agent_name: str):
    """Remove an agent from the arena."""
    if agent_name in self._agents:
        state = self._agents[agent_name]
        # Remove from warlord positions
        for domain in list(state.is_warlord):
            if self._warlords.get(domain) == agent_name:
                del self._warlords[domain]
        del self._agents[agent_name]

ArenaConfig¶

`orc.arena.arena.ArenaConfig` `dataclass` ¶

Configuration for the Arena.

Source code in orc/arena/arena.py

@dataclass
class ArenaConfig:
    """Configuration for the Arena."""

    # Challenge settings
    challenge_probability: float = 0.3  # Base probability of challenge on domain overlap
    min_reputation_to_challenge: float = 0.2  # Minimum reputation to challenge
    challenge_cooldown_seconds: int = 300  # Cooldown after losing a challenge

    # Leadership settings
    min_trials_for_leadership: int = 1  # Minimum trial wins to become Warlord
    leadership_decay_rate: float = 0.01  # Reputation decay per hour without defense
    max_consecutive_defenses: int = 10  # Force rotation after N defenses

    # Trial settings
    trial_timeout_seconds: int = 300  # Timeout for trial execution
    parallel_trial_execution: bool = True  # Execute trial attempts in parallel

    # Defaults
    default_reputation: float = 0.5  # Starting reputation for new agents

Trial¶

`orc.arena.trial.Trial` ¶

Executes a trial between a Warlord and Challenger.

Example

trial = Trial( task="Analyze Q4 sales data", domain="data", warlord=data_agent, challenger=analytics_agent, judge=llm_judge, )

result = await trial.execute() print(f"Winner: {result.winner}")

Source code in orc/arena/trial.py

class Trial:
    """
    Executes a trial between a Warlord and Challenger.

    Example:
        trial = Trial(
            task="Analyze Q4 sales data",
            domain="data",
            warlord=data_agent,
            challenger=analytics_agent,
            judge=llm_judge,
        )

        result = await trial.execute()
        print(f"Winner: {result.winner}")
    """

    def __init__(
        self,
        task: str,
        domain: str,
        warlord: Agent,
        challenger: Agent,
        judge: Judge,
        context: Optional[Dict[str, Any]] = None,
        timeout: int = 300,
        parallel: bool = True,
    ):
        """
        Initialize a trial.

        Args:
            task: The task to execute.
            domain: The domain being contested.
            warlord: The current Warlord agent.
            challenger: The challenging agent.
            judge: Judge to evaluate outcomes.
            context: Execution context.
            timeout: Timeout for each agent's execution (seconds).
            parallel: Whether to execute agents in parallel.
        """
        self.task = task
        self.domain = domain
        self.warlord = warlord
        self.challenger = challenger
        self.judge = judge
        self.context = context or {}
        self.timeout = timeout
        self.parallel = parallel
        self.trial_id = str(uuid.uuid4())

    async def execute(self) -> TrialResult:
        """
        Execute the trial.

        Both agents attempt the task. The Judge evaluates the results
        and determines a winner.

        Returns:
            TrialResult with the outcome.
        """
        start_time = datetime.now(timezone.utc)

        # Build contexts for each agent
        warlord_context = {
            **self.context,
            "task_id": f"{self.trial_id}_warlord",
            "trial_id": self.trial_id,
            "role": "warlord",
        }
        challenger_context = {
            **self.context,
            "task_id": f"{self.trial_id}_challenger",
            "trial_id": self.trial_id,
            "role": "challenger",
        }

        # Execute both agents
        if self.parallel:
            warlord_result, challenger_result = await self._execute_parallel(
                warlord_context, challenger_context
            )
        else:
            warlord_result = await self._execute_single(self.warlord, warlord_context)
            challenger_result = await self._execute_single(self.challenger, challenger_context)

        # Build submissions for judge
        submissions = [
            Submission(
                agent=self.warlord.name,
                result=warlord_result,
                latency_ms=warlord_result.duration_ms,
            ),
            Submission(
                agent=self.challenger.name,
                result=challenger_result,
                latency_ms=challenger_result.duration_ms,
            ),
        ]

        # Judge evaluates
        verdict = await self.judge.evaluate(self.task, submissions)

        # Determine winner (ties go to the defending warlord)
        winner = verdict.winner
        if verdict.is_tie or winner not in (self.warlord.name, self.challenger.name):
            winner = self.warlord.name
        winner_result = (
            warlord_result if winner == self.warlord.name else challenger_result
        )

        # Calculate duration
        duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)

        return TrialResult(
            task=self.task,
            domain=self.domain,
            winner=winner,
            winner_result=winner_result,
            was_challenged=True,
            verdict=verdict,
            warlord_result=warlord_result,
            challenger_result=challenger_result,
            trial_id=self.trial_id,
            duration_ms=duration_ms,
        )

    async def _execute_parallel(
        self,
        warlord_context: Dict[str, Any],
        challenger_context: Dict[str, Any],
    ) -> tuple[TaskResult, TaskResult]:
        """Execute both agents in parallel."""
        results = await asyncio.gather(
            self._execute_single(self.warlord, warlord_context),
            self._execute_single(self.challenger, challenger_context),
            return_exceptions=True,
        )

        warlord_result = results[0]
        challenger_result = results[1]

        # Handle exceptions
        if isinstance(warlord_result, Exception):
            warlord_result = TaskResult.failure(
                task_id=warlord_context["task_id"],
                error=str(warlord_result),
            )
        if isinstance(challenger_result, Exception):
            challenger_result = TaskResult.failure(
                task_id=challenger_context["task_id"],
                error=str(challenger_result),
            )

        return warlord_result, challenger_result

    async def _execute_single(
        self,
        agent: Agent,
        context: Dict[str, Any],
    ) -> TaskResult:
        """Execute a single agent with timeout."""
        start_time = datetime.now(timezone.utc)

        try:
            result = await asyncio.wait_for(
                agent.process_task(self.task, context),
                timeout=self.timeout,
            )

            # Add duration if not set
            if result.duration_ms is None:
                duration_ms = int(
                    (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
                )
                # TaskResult is frozen, so we need to create a new one
                result = TaskResult(
                    task_id=result.task_id,
                    outcome=result.outcome,
                    data=result.data,
                    metadata=result.metadata,
                    should_continue=result.should_continue,
                    skip_reason=result.skip_reason,
                    timestamp=result.timestamp,
                    duration_ms=duration_ms,
                    error_message=result.error_message,
                )

            return result

        except asyncio.TimeoutError:
            return TaskResult.failure(
                task_id=context["task_id"],
                error=f"Timeout after {self.timeout}s",
            )
        except Exception as e:
            return TaskResult.failure(
                task_id=context["task_id"],
                error=str(e),
            )

`init(task, domain, warlord, challenger, judge, context=None, timeout=300, parallel=True)` ¶

Initialize a trial.

Parameters:

Name	Type	Description	Default
`task`	`str`	The task to execute.	required
`domain`	`str`	The domain being contested.	required
`warlord`	`Agent`	The current Warlord agent.	required
`challenger`	`Agent`	The challenging agent.	required
`judge`	`Judge`	Judge to evaluate outcomes.	required
`context`	`Optional[Dict[str, Any]]`	Execution context.	`None`
`timeout`	`int`	Timeout for each agent's execution (seconds).	`300`
`parallel`	`bool`	Whether to execute agents in parallel.	`True`

Source code in orc/arena/trial.py

def __init__(
    self,
    task: str,
    domain: str,
    warlord: Agent,
    challenger: Agent,
    judge: Judge,
    context: Optional[Dict[str, Any]] = None,
    timeout: int = 300,
    parallel: bool = True,
):
    """
    Initialize a trial.

    Args:
        task: The task to execute.
        domain: The domain being contested.
        warlord: The current Warlord agent.
        challenger: The challenging agent.
        judge: Judge to evaluate outcomes.
        context: Execution context.
        timeout: Timeout for each agent's execution (seconds).
        parallel: Whether to execute agents in parallel.
    """
    self.task = task
    self.domain = domain
    self.warlord = warlord
    self.challenger = challenger
    self.judge = judge
    self.context = context or {}
    self.timeout = timeout
    self.parallel = parallel
    self.trial_id = str(uuid.uuid4())

`execute()` `async` ¶

Execute the trial.

Both agents attempt the task. The Judge evaluates the results and determines a winner.

Returns:

Type	Description
`TrialResult`	TrialResult with the outcome.

Source code in orc/arena/trial.py

async def execute(self) -> TrialResult:
    """
    Execute the trial.

    Both agents attempt the task. The Judge evaluates the results
    and determines a winner.

    Returns:
        TrialResult with the outcome.
    """
    start_time = datetime.now(timezone.utc)

    # Build contexts for each agent
    warlord_context = {
        **self.context,
        "task_id": f"{self.trial_id}_warlord",
        "trial_id": self.trial_id,
        "role": "warlord",
    }
    challenger_context = {
        **self.context,
        "task_id": f"{self.trial_id}_challenger",
        "trial_id": self.trial_id,
        "role": "challenger",
    }

    # Execute both agents
    if self.parallel:
        warlord_result, challenger_result = await self._execute_parallel(
            warlord_context, challenger_context
        )
    else:
        warlord_result = await self._execute_single(self.warlord, warlord_context)
        challenger_result = await self._execute_single(self.challenger, challenger_context)

    # Build submissions for judge
    submissions = [
        Submission(
            agent=self.warlord.name,
            result=warlord_result,
            latency_ms=warlord_result.duration_ms,
        ),
        Submission(
            agent=self.challenger.name,
            result=challenger_result,
            latency_ms=challenger_result.duration_ms,
        ),
    ]

    # Judge evaluates
    verdict = await self.judge.evaluate(self.task, submissions)

    # Determine winner (ties go to the defending warlord)
    winner = verdict.winner
    if verdict.is_tie or winner not in (self.warlord.name, self.challenger.name):
        winner = self.warlord.name
    winner_result = (
        warlord_result if winner == self.warlord.name else challenger_result
    )

    # Calculate duration
    duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)

    return TrialResult(
        task=self.task,
        domain=self.domain,
        winner=winner,
        winner_result=winner_result,
        was_challenged=True,
        verdict=verdict,
        warlord_result=warlord_result,
        challenger_result=challenger_result,
        trial_id=self.trial_id,
        duration_ms=duration_ms,
    )

TrialResult¶

`orc.arena.trial.TrialResult` `dataclass` ¶

Result of a trial between two agents.

Attributes:

Name	Type	Description
`task`	`str`	The task that was executed
`domain`	`str`	The domain being contested
`winner`	`str`	Name of the winning agent
`winner_result`	`TaskResult`	TaskResult from the winner
`was_challenged`	`bool`	Whether this was a contested trial
`verdict`	`Optional[Verdict]`	Judge's verdict (if challenged)
`warlord_result`	`Optional[TaskResult]`	TaskResult from the warlord
`challenger_result`	`Optional[TaskResult]`	TaskResult from the challenger
`trial_id`	`str`	Unique trial identifier
`timestamp`	`datetime`	When the trial completed
`duration_ms`	`Optional[int]`	Total trial duration in milliseconds

Source code in orc/arena/trial.py

@dataclass
class TrialResult:
    """
    Result of a trial between two agents.

    Attributes:
        task: The task that was executed
        domain: The domain being contested
        winner: Name of the winning agent
        winner_result: TaskResult from the winner
        was_challenged: Whether this was a contested trial
        verdict: Judge's verdict (if challenged)
        warlord_result: TaskResult from the warlord
        challenger_result: TaskResult from the challenger
        trial_id: Unique trial identifier
        timestamp: When the trial completed
        duration_ms: Total trial duration in milliseconds
    """

    task: str
    domain: str
    winner: str
    winner_result: TaskResult
    was_challenged: bool
    verdict: Optional[Verdict]
    warlord_result: Optional[TaskResult] = None
    challenger_result: Optional[TaskResult] = None
    trial_id: str = field(default_factory=lambda: str(uuid.uuid4()))
    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    duration_ms: Optional[int] = None

    @property
    def data(self) -> Any:
        """Get the winning result's data."""
        return self.winner_result.data if self.winner_result else None

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for logging/storage."""
        return {
            "trial_id": self.trial_id,
            "task": self.task,
            "domain": self.domain,
            "winner": self.winner,
            "was_challenged": self.was_challenged,
            "verdict": self.verdict.to_dict() if self.verdict else None,
            "warlord_result": self.warlord_result.to_dict() if self.warlord_result else None,
            "challenger_result": (
                self.challenger_result.to_dict() if self.challenger_result else None
            ),
            "timestamp": self.timestamp.isoformat(),
            "duration_ms": self.duration_ms,
        }

`data` `property` ¶

Get the winning result's data.

`to_dict()` ¶

Convert to dictionary for logging/storage.

Source code in orc/arena/trial.py

def to_dict(self) -> Dict[str, Any]:
    """Convert to dictionary for logging/storage."""
    return {
        "trial_id": self.trial_id,
        "task": self.task,
        "domain": self.domain,
        "winner": self.winner,
        "was_challenged": self.was_challenged,
        "verdict": self.verdict.to_dict() if self.verdict else None,
        "warlord_result": self.warlord_result.to_dict() if self.warlord_result else None,
        "challenger_result": (
            self.challenger_result.to_dict() if self.challenger_result else None
        ),
        "timestamp": self.timestamp.isoformat(),
        "duration_ms": self.duration_ms,
    }

Arena¶

Arena¶

orc.arena.arena.Arena ¶

Process a task - may trigger a trial¶

Check who leads which domain¶

__init__(agents, judge, config=None, reputation_store=None, on_challenge=None, on_succession=None, on_trial_complete=None) ¶

get_leaderboard(domain, limit=10) ¶

get_reputation(agent_name, domain) ¶

get_trial_history(limit=50) ¶

get_warlord(domain) ¶

process(task, domain=None, context=None) async ¶

register_agent(agent) ¶

unregister_agent(agent_name) ¶

ArenaConfig¶

orc.arena.arena.ArenaConfig dataclass ¶

Trial¶

orc.arena.trial.Trial ¶

__init__(task, domain, warlord, challenger, judge, context=None, timeout=300, parallel=True) ¶

execute() async ¶

TrialResult¶

orc.arena.trial.TrialResult dataclass ¶

data property ¶

to_dict() ¶

`orc.arena.arena.Arena` ¶

`init(agents, judge, config=None, reputation_store=None, on_challenge=None, on_succession=None, on_trial_complete=None)` ¶

`get_leaderboard(domain, limit=10)` ¶

`get_reputation(agent_name, domain)` ¶

`get_trial_history(limit=50)` ¶

`get_warlord(domain)` ¶

`process(task, domain=None, context=None)` `async` ¶

`register_agent(agent)` ¶

`unregister_agent(agent_name)` ¶

`orc.arena.arena.ArenaConfig` `dataclass` ¶

`orc.arena.trial.Trial` ¶

`init(task, domain, warlord, challenger, judge, context=None, timeout=300, parallel=True)` ¶

`execute()` `async` ¶

`orc.arena.trial.TrialResult` `dataclass` ¶

`data` `property` ¶

`to_dict()` ¶