Skip to content

Arena

The core orchestration component where agents compete for leadership.


Arena

orc.arena.arena.Arena

The Arena where agents compete for leadership.

Example

arena = Arena( agents=[DataAgent(), ReportAgent(), AnalyticsAgent()], judge=LLMJudge(llm), config=ArenaConfig(challenge_probability=0.3), )

Process a task - may trigger a trial

result = await arena.process("Analyze Q4 sales data")

Check who leads which domain

warlord = arena.get_warlord("data") print(f"Data domain Warlord: {warlord}")

Source code in orc/arena/arena.py
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
class Arena:
    """
    The Arena where agents compete for leadership.

    Example:
        arena = Arena(
            agents=[DataAgent(), ReportAgent(), AnalyticsAgent()],
            judge=LLMJudge(llm),
            config=ArenaConfig(challenge_probability=0.3),
        )

        # Process a task - may trigger a trial
        result = await arena.process("Analyze Q4 sales data")

        # Check who leads which domain
        warlord = arena.get_warlord("data")
        print(f"Data domain Warlord: {warlord}")
    """

    def __init__(
        self,
        agents: List[Agent],
        judge: Judge,
        config: Optional[ArenaConfig] = None,
        reputation_store: Optional[ReputationStore] = None,
        # Hooks
        on_challenge: Optional[Callable[[str, str, str], None]] = None,
        on_succession: Optional[Callable[[str, str, str], None]] = None,
        on_trial_complete: Optional[Callable[[Verdict], None]] = None,
    ):
        """
        Initialize the Arena.

        Args:
            agents: List of agents that can compete.
            judge: Judge to evaluate trial outcomes.
            config: Arena configuration.
            reputation_store: Optional persistent storage for reputation.
            on_challenge: Hook called when a challenge is issued.
            on_succession: Hook called when leadership changes.
            on_trial_complete: Hook called when a trial completes.
        """
        self.judge = judge
        self.config = config or ArenaConfig()
        self.reputation_store = reputation_store

        # Hooks
        self._on_challenge = on_challenge
        self._on_succession = on_succession
        self._on_trial_complete = on_trial_complete

        # Initialize agent states
        self._agents: Dict[str, AgentState] = {}
        for agent in agents:
            self._agents[agent.name] = AgentState(
                agent=agent,
                reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
            )

        # Domain -> current warlord name
        self._warlords: Dict[str, str] = {}

        # Initialize warlords (first agent claiming each domain)
        self._initialize_warlords()

        # Trial history
        self._trial_history: List[TrialResult] = []

    def _get_domains(self, agent: Agent) -> List[str]:
        """Get domains for an agent."""
        if hasattr(agent, "domains"):
            return agent.domains
        # Fallback: use capabilities as domains
        return agent.capabilities

    def _initialize_warlords(self):
        """Set initial warlords based on first-come-first-serve."""
        for name, state in self._agents.items():
            for domain in self._get_domains(state.agent):
                if domain not in self._warlords:
                    self._warlords[domain] = name
                    state.is_warlord.add(domain)

    async def process(
        self,
        task: str,
        domain: Optional[str] = None,
        context: Optional[Dict[str, Any]] = None,
    ) -> TrialResult:
        """
        Process a task through the arena.

        This may trigger a trial if:
        1. Multiple agents claim the domain
        2. A challenger decides to challenge
        3. Challenge conditions are met

        Args:
            task: The task to execute.
            domain: Optional domain hint. If not provided, will be inferred.
            context: Optional execution context.

        Returns:
            TrialResult with the outcome.
        """
        context = context or {}
        context["task_id"] = context.get("task_id", str(uuid.uuid4()))

        # Determine domain
        if not domain:
            domain = await self._infer_domain(task)

        # Get current warlord
        warlord_name = self._warlords.get(domain)
        if not warlord_name:
            # No warlord - elect one
            warlord_name = await self._elect_warlord(domain)

        warlord_state = self._agents[warlord_name]

        # Check for challengers
        challenger = await self._find_challenger(task, domain, warlord_name)

        if challenger:
            # TRIAL BY COMBAT!
            if self._on_challenge:
                self._on_challenge(warlord_name, challenger, domain)

            trial = Trial(
                task=task,
                domain=domain,
                warlord=warlord_state.agent,
                challenger=self._agents[challenger].agent,
                judge=self.judge,
                context=context,
                timeout=self.config.trial_timeout_seconds,
                parallel=self.config.parallel_trial_execution,
            )

            result = await trial.execute()
            self._trial_history.append(result)

            # Update leadership
            await self._process_trial_result(result, domain, warlord_name, challenger)

            if self._on_trial_complete:
                self._on_trial_complete(result.verdict)

            return result

        else:
            # No challenge - warlord executes
            task_result = await warlord_state.agent.process_task(task, context)

            return TrialResult(
                task=task,
                domain=domain,
                winner=warlord_name,
                winner_result=task_result,
                was_challenged=False,
                verdict=None,
            )

    async def _infer_domain(self, task: str) -> str:
        """Infer the domain from the task description."""
        # Simple heuristic: find domain with most keyword matches
        task_lower = task.lower()
        scores: Dict[str, int] = {}

        for domain in self._warlords.keys():
            score = task_lower.count(domain.lower())
            # Also check agent capabilities
            for name, state in self._agents.items():
                for cap in state.agent.capabilities:
                    if cap.lower() in task_lower:
                        for d in self._get_domains(state.agent):
                            scores[d] = scores.get(d, 0) + 1

            scores[domain] = scores.get(domain, 0) + score

        if scores:
            return max(scores, key=scores.get)

        # Fallback: return first domain
        return list(self._warlords.keys())[0] if self._warlords else "general"

    async def _elect_warlord(self, domain: str) -> str:
        """Elect a warlord for a domain with no current leader."""
        candidates = [
            name for name, state in self._agents.items()
            if domain in self._get_domains(state.agent)
        ]

        if not candidates:
            # No one claims this domain - assign to highest reputation agent
            candidates = list(self._agents.keys())

        # Pick highest reputation
        best = max(candidates, key=lambda n: self._agents[n].reputation.get(domain, 0))
        self._warlords[domain] = best
        self._agents[best].is_warlord.add(domain)
        return best

    async def _find_challenger(
        self,
        task: str,
        domain: str,
        warlord_name: str,
    ) -> Optional[str]:
        """Find an agent willing to challenge the warlord."""
        candidates = []

        for name, state in self._agents.items():
            if name == warlord_name:
                continue

            # Check if agent claims this domain
            if domain not in self._get_domains(state.agent):
                continue

            # Check cooldown
            last_challenge = state.last_challenge_time.get(domain)
            if last_challenge:
                elapsed = (datetime.now(timezone.utc) - last_challenge).total_seconds()
                if elapsed < self.config.challenge_cooldown_seconds:
                    continue

            # Check minimum reputation
            rep = state.reputation.get(domain, self.config.default_reputation)
            if rep < self.config.min_reputation_to_challenge:
                continue

            # Check challenge strategy
            strategy = getattr(state.agent, "challenge_strategy", AlwaysChallenge())
            if await self._should_challenge(strategy, state, domain, warlord_name):
                candidates.append(name)

        if not candidates:
            return None

        # Random selection weighted by reputation
        weights = [
            self._agents[n].reputation.get(domain, self.config.default_reputation)
            for n in candidates
        ]
        total = sum(weights)
        if total == 0:
            return random.choice(candidates)

        r = random.random() * total
        cumulative = 0
        for name, weight in zip(candidates, weights):
            cumulative += weight
            if r <= cumulative:
                return name

        return candidates[-1]

    async def _should_challenge(
        self,
        strategy: ChallengeStrategy,
        state: AgentState,
        domain: str,
        warlord_name: str,
    ) -> bool:
        """Check if an agent should challenge based on strategy."""
        # Base probability check
        if random.random() > self.config.challenge_probability:
            return False

        # Strategy check
        warlord_rep = self._agents[warlord_name].reputation.get(
            domain, self.config.default_reputation
        )
        challenger_rep = state.reputation.get(domain, self.config.default_reputation)

        return strategy.should_challenge(
            domain=domain,
            warlord_name=warlord_name,
            warlord_reputation=warlord_rep,
            challenger_reputation=challenger_rep,
        )

    async def _process_trial_result(
        self,
        result: TrialResult,
        domain: str,
        warlord_name: str,
        challenger_name: str,
    ):
        """Process trial result and update leadership."""
        warlord_state = self._agents[warlord_name]
        challenger_state = self._agents[challenger_name]

        # Update challenge time
        challenger_state.last_challenge_time[domain] = datetime.now(timezone.utc)

        if result.winner == challenger_name:
            # SUCCESSION!
            # Update trial counts
            challenger_state.trial_wins[domain] = challenger_state.trial_wins.get(domain, 0) + 1
            warlord_state.trial_losses[domain] = warlord_state.trial_losses.get(domain, 0) + 1

            # Update reputation
            challenger_state.reputation[domain] = min(
                1.0, challenger_state.reputation.get(domain, 0.5) + 0.1
            )
            warlord_state.reputation[domain] = max(
                0.0, warlord_state.reputation.get(domain, 0.5) - 0.1
            )

            # Transfer leadership
            warlord_state.is_warlord.discard(domain)
            warlord_state.consecutive_defenses[domain] = 0
            challenger_state.is_warlord.add(domain)
            challenger_state.consecutive_defenses[domain] = 0
            self._warlords[domain] = challenger_name

            if self._on_succession:
                self._on_succession(warlord_name, challenger_name, domain)

        else:
            # Warlord defends!
            warlord_state.trial_wins[domain] = warlord_state.trial_wins.get(domain, 0) + 1
            challenger_state.trial_losses[domain] = challenger_state.trial_losses.get(domain, 0) + 1

            # Update reputation
            warlord_state.reputation[domain] = min(
                1.0, warlord_state.reputation.get(domain, 0.5) + 0.05
            )
            challenger_state.reputation[domain] = max(
                0.0, challenger_state.reputation.get(domain, 0.5) - 0.05
            )

            # Track consecutive defenses
            warlord_state.consecutive_defenses[domain] = (
                warlord_state.consecutive_defenses.get(domain, 0) + 1
            )
            warlord_state.last_defense_time[domain] = datetime.now(timezone.utc)

            # Check for forced rotation
            if warlord_state.consecutive_defenses[domain] >= self.config.max_consecutive_defenses:
                # Force rotation to second-highest reputation
                await self._force_rotation(domain, warlord_name)

        # Persist reputation if store available
        if self.reputation_store:
            await self.reputation_store.update_reputation(
                challenger_name,
                domain,
                challenger_state.reputation[domain] - self.config.default_reputation,
            )
            await self.reputation_store.update_reputation(
                warlord_name,
                domain,
                warlord_state.reputation[domain] - self.config.default_reputation,
            )

    async def _force_rotation(self, domain: str, current_warlord: str):
        """Force leadership rotation after too many consecutive defenses."""
        # Find second-highest reputation
        candidates = [
            (name, state.reputation.get(domain, 0))
            for name, state in self._agents.items()
            if name != current_warlord and domain in self._get_domains(state.agent)
        ]

        if candidates:
            new_warlord = max(candidates, key=lambda x: x[1])[0]
            self._agents[current_warlord].is_warlord.discard(domain)
            self._agents[current_warlord].consecutive_defenses[domain] = 0
            self._agents[new_warlord].is_warlord.add(domain)
            self._warlords[domain] = new_warlord

            if self._on_succession:
                self._on_succession(current_warlord, new_warlord, domain)

    # Public API

    def get_warlord(self, domain: str) -> Optional[str]:
        """Get the current Warlord for a domain."""
        return self._warlords.get(domain)

    def get_reputation(self, agent_name: str, domain: str) -> float:
        """Get an agent's reputation for a domain."""
        if agent_name not in self._agents:
            return 0.0
        return self._agents[agent_name].reputation.get(domain, self.config.default_reputation)

    def get_leaderboard(self, domain: str, limit: int = 10) -> List[Dict[str, Any]]:
        """Get the reputation leaderboard for a domain."""
        scores = [
            {
                "agent": name,
                "reputation": state.reputation.get(domain, self.config.default_reputation),
                "wins": state.trial_wins.get(domain, 0),
                "losses": state.trial_losses.get(domain, 0),
                "is_warlord": domain in state.is_warlord,
            }
            for name, state in self._agents.items()
            if domain in self._get_domains(state.agent)
        ]

        return sorted(scores, key=lambda x: x["reputation"], reverse=True)[:limit]

    def get_trial_history(self, limit: int = 50) -> List[TrialResult]:
        """Get recent trial history."""
        return self._trial_history[-limit:]

    def register_agent(self, agent: Agent):
        """Register a new agent in the arena."""
        self._agents[agent.name] = AgentState(
            agent=agent,
            reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
        )

    def unregister_agent(self, agent_name: str):
        """Remove an agent from the arena."""
        if agent_name in self._agents:
            state = self._agents[agent_name]
            # Remove from warlord positions
            for domain in list(state.is_warlord):
                if self._warlords.get(domain) == agent_name:
                    del self._warlords[domain]
            del self._agents[agent_name]

__init__(agents, judge, config=None, reputation_store=None, on_challenge=None, on_succession=None, on_trial_complete=None)

Initialize the Arena.

Parameters:

Name Type Description Default
agents List[Agent]

List of agents that can compete.

required
judge Judge

Judge to evaluate trial outcomes.

required
config Optional[ArenaConfig]

Arena configuration.

None
reputation_store Optional[ReputationStore]

Optional persistent storage for reputation.

None
on_challenge Optional[Callable[[str, str, str], None]]

Hook called when a challenge is issued.

None
on_succession Optional[Callable[[str, str, str], None]]

Hook called when leadership changes.

None
on_trial_complete Optional[Callable[[Verdict], None]]

Hook called when a trial completes.

None
Source code in orc/arena/arena.py
def __init__(
    self,
    agents: List[Agent],
    judge: Judge,
    config: Optional[ArenaConfig] = None,
    reputation_store: Optional[ReputationStore] = None,
    # Hooks
    on_challenge: Optional[Callable[[str, str, str], None]] = None,
    on_succession: Optional[Callable[[str, str, str], None]] = None,
    on_trial_complete: Optional[Callable[[Verdict], None]] = None,
):
    """
    Initialize the Arena.

    Args:
        agents: List of agents that can compete.
        judge: Judge to evaluate trial outcomes.
        config: Arena configuration.
        reputation_store: Optional persistent storage for reputation.
        on_challenge: Hook called when a challenge is issued.
        on_succession: Hook called when leadership changes.
        on_trial_complete: Hook called when a trial completes.
    """
    self.judge = judge
    self.config = config or ArenaConfig()
    self.reputation_store = reputation_store

    # Hooks
    self._on_challenge = on_challenge
    self._on_succession = on_succession
    self._on_trial_complete = on_trial_complete

    # Initialize agent states
    self._agents: Dict[str, AgentState] = {}
    for agent in agents:
        self._agents[agent.name] = AgentState(
            agent=agent,
            reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
        )

    # Domain -> current warlord name
    self._warlords: Dict[str, str] = {}

    # Initialize warlords (first agent claiming each domain)
    self._initialize_warlords()

    # Trial history
    self._trial_history: List[TrialResult] = []

get_leaderboard(domain, limit=10)

Get the reputation leaderboard for a domain.

Source code in orc/arena/arena.py
def get_leaderboard(self, domain: str, limit: int = 10) -> List[Dict[str, Any]]:
    """Get the reputation leaderboard for a domain."""
    scores = [
        {
            "agent": name,
            "reputation": state.reputation.get(domain, self.config.default_reputation),
            "wins": state.trial_wins.get(domain, 0),
            "losses": state.trial_losses.get(domain, 0),
            "is_warlord": domain in state.is_warlord,
        }
        for name, state in self._agents.items()
        if domain in self._get_domains(state.agent)
    ]

    return sorted(scores, key=lambda x: x["reputation"], reverse=True)[:limit]

get_reputation(agent_name, domain)

Get an agent's reputation for a domain.

Source code in orc/arena/arena.py
def get_reputation(self, agent_name: str, domain: str) -> float:
    """Get an agent's reputation for a domain."""
    if agent_name not in self._agents:
        return 0.0
    return self._agents[agent_name].reputation.get(domain, self.config.default_reputation)

get_trial_history(limit=50)

Get recent trial history.

Source code in orc/arena/arena.py
def get_trial_history(self, limit: int = 50) -> List[TrialResult]:
    """Get recent trial history."""
    return self._trial_history[-limit:]

get_warlord(domain)

Get the current Warlord for a domain.

Source code in orc/arena/arena.py
def get_warlord(self, domain: str) -> Optional[str]:
    """Get the current Warlord for a domain."""
    return self._warlords.get(domain)

process(task, domain=None, context=None) async

Process a task through the arena.

This may trigger a trial if: 1. Multiple agents claim the domain 2. A challenger decides to challenge 3. Challenge conditions are met

Parameters:

Name Type Description Default
task str

The task to execute.

required
domain Optional[str]

Optional domain hint. If not provided, will be inferred.

None
context Optional[Dict[str, Any]]

Optional execution context.

None

Returns:

Type Description
TrialResult

TrialResult with the outcome.

Source code in orc/arena/arena.py
async def process(
    self,
    task: str,
    domain: Optional[str] = None,
    context: Optional[Dict[str, Any]] = None,
) -> TrialResult:
    """
    Process a task through the arena.

    This may trigger a trial if:
    1. Multiple agents claim the domain
    2. A challenger decides to challenge
    3. Challenge conditions are met

    Args:
        task: The task to execute.
        domain: Optional domain hint. If not provided, will be inferred.
        context: Optional execution context.

    Returns:
        TrialResult with the outcome.
    """
    context = context or {}
    context["task_id"] = context.get("task_id", str(uuid.uuid4()))

    # Determine domain
    if not domain:
        domain = await self._infer_domain(task)

    # Get current warlord
    warlord_name = self._warlords.get(domain)
    if not warlord_name:
        # No warlord - elect one
        warlord_name = await self._elect_warlord(domain)

    warlord_state = self._agents[warlord_name]

    # Check for challengers
    challenger = await self._find_challenger(task, domain, warlord_name)

    if challenger:
        # TRIAL BY COMBAT!
        if self._on_challenge:
            self._on_challenge(warlord_name, challenger, domain)

        trial = Trial(
            task=task,
            domain=domain,
            warlord=warlord_state.agent,
            challenger=self._agents[challenger].agent,
            judge=self.judge,
            context=context,
            timeout=self.config.trial_timeout_seconds,
            parallel=self.config.parallel_trial_execution,
        )

        result = await trial.execute()
        self._trial_history.append(result)

        # Update leadership
        await self._process_trial_result(result, domain, warlord_name, challenger)

        if self._on_trial_complete:
            self._on_trial_complete(result.verdict)

        return result

    else:
        # No challenge - warlord executes
        task_result = await warlord_state.agent.process_task(task, context)

        return TrialResult(
            task=task,
            domain=domain,
            winner=warlord_name,
            winner_result=task_result,
            was_challenged=False,
            verdict=None,
        )

register_agent(agent)

Register a new agent in the arena.

Source code in orc/arena/arena.py
def register_agent(self, agent: Agent):
    """Register a new agent in the arena."""
    self._agents[agent.name] = AgentState(
        agent=agent,
        reputation={d: self.config.default_reputation for d in self._get_domains(agent)},
    )

unregister_agent(agent_name)

Remove an agent from the arena.

Source code in orc/arena/arena.py
def unregister_agent(self, agent_name: str):
    """Remove an agent from the arena."""
    if agent_name in self._agents:
        state = self._agents[agent_name]
        # Remove from warlord positions
        for domain in list(state.is_warlord):
            if self._warlords.get(domain) == agent_name:
                del self._warlords[domain]
        del self._agents[agent_name]

ArenaConfig

orc.arena.arena.ArenaConfig dataclass

Configuration for the Arena.

Source code in orc/arena/arena.py
@dataclass
class ArenaConfig:
    """Configuration for the Arena."""

    # Challenge settings
    challenge_probability: float = 0.3  # Base probability of challenge on domain overlap
    min_reputation_to_challenge: float = 0.2  # Minimum reputation to challenge
    challenge_cooldown_seconds: int = 300  # Cooldown after losing a challenge

    # Leadership settings
    min_trials_for_leadership: int = 1  # Minimum trial wins to become Warlord
    leadership_decay_rate: float = 0.01  # Reputation decay per hour without defense
    max_consecutive_defenses: int = 10  # Force rotation after N defenses

    # Trial settings
    trial_timeout_seconds: int = 300  # Timeout for trial execution
    parallel_trial_execution: bool = True  # Execute trial attempts in parallel

    # Defaults
    default_reputation: float = 0.5  # Starting reputation for new agents

Trial

orc.arena.trial.Trial

Executes a trial between a Warlord and Challenger.

Example

trial = Trial( task="Analyze Q4 sales data", domain="data", warlord=data_agent, challenger=analytics_agent, judge=llm_judge, )

result = await trial.execute() print(f"Winner: {result.winner}")

Source code in orc/arena/trial.py
class Trial:
    """
    Executes a trial between a Warlord and Challenger.

    Example:
        trial = Trial(
            task="Analyze Q4 sales data",
            domain="data",
            warlord=data_agent,
            challenger=analytics_agent,
            judge=llm_judge,
        )

        result = await trial.execute()
        print(f"Winner: {result.winner}")
    """

    def __init__(
        self,
        task: str,
        domain: str,
        warlord: Agent,
        challenger: Agent,
        judge: Judge,
        context: Optional[Dict[str, Any]] = None,
        timeout: int = 300,
        parallel: bool = True,
    ):
        """
        Initialize a trial.

        Args:
            task: The task to execute.
            domain: The domain being contested.
            warlord: The current Warlord agent.
            challenger: The challenging agent.
            judge: Judge to evaluate outcomes.
            context: Execution context.
            timeout: Timeout for each agent's execution (seconds).
            parallel: Whether to execute agents in parallel.
        """
        self.task = task
        self.domain = domain
        self.warlord = warlord
        self.challenger = challenger
        self.judge = judge
        self.context = context or {}
        self.timeout = timeout
        self.parallel = parallel
        self.trial_id = str(uuid.uuid4())

    async def execute(self) -> TrialResult:
        """
        Execute the trial.

        Both agents attempt the task. The Judge evaluates the results
        and determines a winner.

        Returns:
            TrialResult with the outcome.
        """
        start_time = datetime.now(timezone.utc)

        # Build contexts for each agent
        warlord_context = {
            **self.context,
            "task_id": f"{self.trial_id}_warlord",
            "trial_id": self.trial_id,
            "role": "warlord",
        }
        challenger_context = {
            **self.context,
            "task_id": f"{self.trial_id}_challenger",
            "trial_id": self.trial_id,
            "role": "challenger",
        }

        # Execute both agents
        if self.parallel:
            warlord_result, challenger_result = await self._execute_parallel(
                warlord_context, challenger_context
            )
        else:
            warlord_result = await self._execute_single(self.warlord, warlord_context)
            challenger_result = await self._execute_single(self.challenger, challenger_context)

        # Build submissions for judge
        submissions = [
            Submission(
                agent=self.warlord.name,
                result=warlord_result,
                latency_ms=warlord_result.duration_ms,
            ),
            Submission(
                agent=self.challenger.name,
                result=challenger_result,
                latency_ms=challenger_result.duration_ms,
            ),
        ]

        # Judge evaluates
        verdict = await self.judge.evaluate(self.task, submissions)

        # Determine winner (ties go to the defending warlord)
        winner = verdict.winner
        if verdict.is_tie or winner not in (self.warlord.name, self.challenger.name):
            winner = self.warlord.name
        winner_result = (
            warlord_result if winner == self.warlord.name else challenger_result
        )

        # Calculate duration
        duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)

        return TrialResult(
            task=self.task,
            domain=self.domain,
            winner=winner,
            winner_result=winner_result,
            was_challenged=True,
            verdict=verdict,
            warlord_result=warlord_result,
            challenger_result=challenger_result,
            trial_id=self.trial_id,
            duration_ms=duration_ms,
        )

    async def _execute_parallel(
        self,
        warlord_context: Dict[str, Any],
        challenger_context: Dict[str, Any],
    ) -> tuple[TaskResult, TaskResult]:
        """Execute both agents in parallel."""
        results = await asyncio.gather(
            self._execute_single(self.warlord, warlord_context),
            self._execute_single(self.challenger, challenger_context),
            return_exceptions=True,
        )

        warlord_result = results[0]
        challenger_result = results[1]

        # Handle exceptions
        if isinstance(warlord_result, Exception):
            warlord_result = TaskResult.failure(
                task_id=warlord_context["task_id"],
                error=str(warlord_result),
            )
        if isinstance(challenger_result, Exception):
            challenger_result = TaskResult.failure(
                task_id=challenger_context["task_id"],
                error=str(challenger_result),
            )

        return warlord_result, challenger_result

    async def _execute_single(
        self,
        agent: Agent,
        context: Dict[str, Any],
    ) -> TaskResult:
        """Execute a single agent with timeout."""
        start_time = datetime.now(timezone.utc)

        try:
            result = await asyncio.wait_for(
                agent.process_task(self.task, context),
                timeout=self.timeout,
            )

            # Add duration if not set
            if result.duration_ms is None:
                duration_ms = int(
                    (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
                )
                # TaskResult is frozen, so we need to create a new one
                result = TaskResult(
                    task_id=result.task_id,
                    outcome=result.outcome,
                    data=result.data,
                    metadata=result.metadata,
                    should_continue=result.should_continue,
                    skip_reason=result.skip_reason,
                    timestamp=result.timestamp,
                    duration_ms=duration_ms,
                    error_message=result.error_message,
                )

            return result

        except asyncio.TimeoutError:
            return TaskResult.failure(
                task_id=context["task_id"],
                error=f"Timeout after {self.timeout}s",
            )
        except Exception as e:
            return TaskResult.failure(
                task_id=context["task_id"],
                error=str(e),
            )

__init__(task, domain, warlord, challenger, judge, context=None, timeout=300, parallel=True)

Initialize a trial.

Parameters:

Name Type Description Default
task str

The task to execute.

required
domain str

The domain being contested.

required
warlord Agent

The current Warlord agent.

required
challenger Agent

The challenging agent.

required
judge Judge

Judge to evaluate outcomes.

required
context Optional[Dict[str, Any]]

Execution context.

None
timeout int

Timeout for each agent's execution (seconds).

300
parallel bool

Whether to execute agents in parallel.

True
Source code in orc/arena/trial.py
def __init__(
    self,
    task: str,
    domain: str,
    warlord: Agent,
    challenger: Agent,
    judge: Judge,
    context: Optional[Dict[str, Any]] = None,
    timeout: int = 300,
    parallel: bool = True,
):
    """
    Initialize a trial.

    Args:
        task: The task to execute.
        domain: The domain being contested.
        warlord: The current Warlord agent.
        challenger: The challenging agent.
        judge: Judge to evaluate outcomes.
        context: Execution context.
        timeout: Timeout for each agent's execution (seconds).
        parallel: Whether to execute agents in parallel.
    """
    self.task = task
    self.domain = domain
    self.warlord = warlord
    self.challenger = challenger
    self.judge = judge
    self.context = context or {}
    self.timeout = timeout
    self.parallel = parallel
    self.trial_id = str(uuid.uuid4())

execute() async

Execute the trial.

Both agents attempt the task. The Judge evaluates the results and determines a winner.

Returns:

Type Description
TrialResult

TrialResult with the outcome.

Source code in orc/arena/trial.py
async def execute(self) -> TrialResult:
    """
    Execute the trial.

    Both agents attempt the task. The Judge evaluates the results
    and determines a winner.

    Returns:
        TrialResult with the outcome.
    """
    start_time = datetime.now(timezone.utc)

    # Build contexts for each agent
    warlord_context = {
        **self.context,
        "task_id": f"{self.trial_id}_warlord",
        "trial_id": self.trial_id,
        "role": "warlord",
    }
    challenger_context = {
        **self.context,
        "task_id": f"{self.trial_id}_challenger",
        "trial_id": self.trial_id,
        "role": "challenger",
    }

    # Execute both agents
    if self.parallel:
        warlord_result, challenger_result = await self._execute_parallel(
            warlord_context, challenger_context
        )
    else:
        warlord_result = await self._execute_single(self.warlord, warlord_context)
        challenger_result = await self._execute_single(self.challenger, challenger_context)

    # Build submissions for judge
    submissions = [
        Submission(
            agent=self.warlord.name,
            result=warlord_result,
            latency_ms=warlord_result.duration_ms,
        ),
        Submission(
            agent=self.challenger.name,
            result=challenger_result,
            latency_ms=challenger_result.duration_ms,
        ),
    ]

    # Judge evaluates
    verdict = await self.judge.evaluate(self.task, submissions)

    # Determine winner (ties go to the defending warlord)
    winner = verdict.winner
    if verdict.is_tie or winner not in (self.warlord.name, self.challenger.name):
        winner = self.warlord.name
    winner_result = (
        warlord_result if winner == self.warlord.name else challenger_result
    )

    # Calculate duration
    duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)

    return TrialResult(
        task=self.task,
        domain=self.domain,
        winner=winner,
        winner_result=winner_result,
        was_challenged=True,
        verdict=verdict,
        warlord_result=warlord_result,
        challenger_result=challenger_result,
        trial_id=self.trial_id,
        duration_ms=duration_ms,
    )

TrialResult

orc.arena.trial.TrialResult dataclass

Result of a trial between two agents.

Attributes:

Name Type Description
task str

The task that was executed

domain str

The domain being contested

winner str

Name of the winning agent

winner_result TaskResult

TaskResult from the winner

was_challenged bool

Whether this was a contested trial

verdict Optional[Verdict]

Judge's verdict (if challenged)

warlord_result Optional[TaskResult]

TaskResult from the warlord

challenger_result Optional[TaskResult]

TaskResult from the challenger

trial_id str

Unique trial identifier

timestamp datetime

When the trial completed

duration_ms Optional[int]

Total trial duration in milliseconds

Source code in orc/arena/trial.py
@dataclass
class TrialResult:
    """
    Result of a trial between two agents.

    Attributes:
        task: The task that was executed
        domain: The domain being contested
        winner: Name of the winning agent
        winner_result: TaskResult from the winner
        was_challenged: Whether this was a contested trial
        verdict: Judge's verdict (if challenged)
        warlord_result: TaskResult from the warlord
        challenger_result: TaskResult from the challenger
        trial_id: Unique trial identifier
        timestamp: When the trial completed
        duration_ms: Total trial duration in milliseconds
    """

    task: str
    domain: str
    winner: str
    winner_result: TaskResult
    was_challenged: bool
    verdict: Optional[Verdict]
    warlord_result: Optional[TaskResult] = None
    challenger_result: Optional[TaskResult] = None
    trial_id: str = field(default_factory=lambda: str(uuid.uuid4()))
    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    duration_ms: Optional[int] = None

    @property
    def data(self) -> Any:
        """Get the winning result's data."""
        return self.winner_result.data if self.winner_result else None

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for logging/storage."""
        return {
            "trial_id": self.trial_id,
            "task": self.task,
            "domain": self.domain,
            "winner": self.winner,
            "was_challenged": self.was_challenged,
            "verdict": self.verdict.to_dict() if self.verdict else None,
            "warlord_result": self.warlord_result.to_dict() if self.warlord_result else None,
            "challenger_result": (
                self.challenger_result.to_dict() if self.challenger_result else None
            ),
            "timestamp": self.timestamp.isoformat(),
            "duration_ms": self.duration_ms,
        }

data property

Get the winning result's data.

to_dict()

Convert to dictionary for logging/storage.

Source code in orc/arena/trial.py
def to_dict(self) -> Dict[str, Any]:
    """Convert to dictionary for logging/storage."""
    return {
        "trial_id": self.trial_id,
        "task": self.task,
        "domain": self.domain,
        "winner": self.winner,
        "was_challenged": self.was_challenged,
        "verdict": self.verdict.to_dict() if self.verdict else None,
        "warlord_result": self.warlord_result.to_dict() if self.warlord_result else None,
        "challenger_result": (
            self.challenger_result.to_dict() if self.challenger_result else None
        ),
        "timestamp": self.timestamp.isoformat(),
        "duration_ms": self.duration_ms,
    }