Harness

Spawn external coding agents (Claude Code, Codex, Gemini, OpenCode) from inside agent loops with budget caps, schema-bound output, and retries.

The building block for driving external coding agents from inside your loop. app.harness(prompt, schema=..., provider=...) spawns Claude Code, Codex, Gemini, or OpenCode as a managed subprocess (or SDK call), waits for structured output, validates it against your schema, and hands back a rich result with cost and retry metadata.

Without harness, you would manage subprocess plumbing, JSON repair, retry-with-backoff, schema validation, output-file cleanup, and per-provider quirks yourself. With harness, those become one call you can wrap in a loop, a tournament, or an adversarial pattern.

The shape

from pydantic import BaseModel
from agentfield import Agent, HarnessConfig

class ReviewResult(BaseModel):
    findings: list[str]
    severity: str  # "low" | "medium" | "high"

# Defaults pinned on the agent — every .harness(...) call inherits them.
app = Agent(
    node_id="reviewer",
    harness_config=HarnessConfig(
        provider="claude-code",
        model="sonnet",
        permission_mode="plan",
        max_turns=12,
        max_budget_usd=0.50,
        tools=["Read", "Grep", "Glob"],
    ),
)

@app.reasoner()
async def review_diff(diff: str) -> dict:
    result = await app.harness(
        f"Review this diff. Be precise, no fluff.\n\n{diff}",
        schema=ReviewResult,
    )

    if result.is_error:
        return {"ok": False, "error": result.error_message}

    return {
        "ok": True,
        "review": result.parsed.model_dump(),
        "cost_usd": result.cost_usd,
        "num_turns": result.num_turns,
        "session_id": result.session_id,
    }

import { Agent, type HarnessConfig } from '@agentfield/sdk';
import { z } from 'zod';

const ReviewResult = z.object({
  findings: z.array(z.string()),
  severity: z.enum(['low', 'medium', 'high']),
});

const harnessConfig: HarnessConfig = {
  provider: 'claude-code',
  model: 'sonnet',
  permissionMode: 'plan',
  maxTurns: 12,
  maxBudgetUsd: 0.5,
  tools: ['Read', 'Grep', 'Glob'],
};

const app = new Agent({ nodeId: 'reviewer', harnessConfig });

app.reasoner('review_diff', async (input: { diff: string }) => {
  const result = await app.harness(
    `Review this diff. Be precise, no fluff.\n\n${input.diff}`,
    { schema: ReviewResult },
  );

  if (result.isError) {
    return { ok: false, error: result.errorMessage };
  }

  return {
    ok: true,
    review: ReviewResult.parse(result.parsed),
    costUsd: result.costUsd,
    numTurns: result.numTurns,
    sessionId: result.sessionId,
  };
});

package main

import (
    "context"
    "fmt"

    "github.com/Agent-Field/agentfield/sdk/go/agent"
    "github.com/Agent-Field/agentfield/sdk/go/harness"
)

type ReviewResult struct {
    Findings []string `json:"findings"`
    Severity string   `json:"severity"`
}

func newReviewer() (*agent.Agent, error) {
    return agent.New(agent.Config{
        NodeID: "reviewer",
        HarnessConfig: &agent.HarnessConfig{
            Provider:       "claude-code",
            Model:          "sonnet",
            PermissionMode: "plan",
            MaxTurns:       12,
        },
    })
}

func reviewDiff(ctx context.Context, app *agent.Agent, diff string) (*ReviewResult, error) {
    var out ReviewResult
    schema, _ := harness.StructToJSONSchema(out)

    result, err := app.Harness(ctx,
        "Review this diff. Be precise, no fluff.\n\n"+diff,
        schema, &out,
        harness.Options{}, // empty — inherits HarnessConfig from the agent
    )
    if err != nil {
        return nil, err
    }
    if result.IsError {
        return nil, fmt.Errorf("harness: %s", result.ErrorMessage)
    }
    return &out, nil
}

HarnessConfig — defaults on the Agent

Pin defaults on the agent constructor; override per call only for the dimensions that genuinely vary by task.

Field	Type	Default	What it does
`provider`	string	required	`"claude-code"` / `"codex"` / `"gemini"` / `"opencode"`.
`model`	string	`"sonnet"`	Provider-specific model identifier — e.g. `"sonnet"`, `"gpt-5-codex"`, `"gemini-2.5-pro"`, `"qwen/qwen3-coder"`.
`max_turns`	int	`30`	Hard cap on agent iterations.
`max_budget_usd`	float	`null`	Cost ceiling. Provider aborts when total spend would exceed this.
`max_retries`	int	`3`	Retry attempts for transient errors (rate-limit, 5xx, connection reset).
`initial_delay` / `max_delay` / `backoff_factor`	float	`1.0` / `30.0` / `2.0`	Exponential-backoff knobs for retries.
`tools`	string[]	`["Read","Write","Edit","Bash","Glob","Grep"]`	Allowed tool names — gates what the coding agent may invoke.
`permission_mode`	string	`null`	`"plan"` (plan-first, then execute) or `"auto"` (bypass per-step prompts).
`system_prompt`	string	`null`	Custom system prompt prepended to the loop.
`env`	dict	`{}`	Extra environment variables forwarded to the subprocess.
`cwd`	string	working dir	Working directory the coding agent treats as the repo root.
`project_dir`	string	`null`	`opencode` only — maps to `--dir`. When set, `cwd` is used only for output-file placement.
`codex_bin` / `gemini_bin` / `opencode_bin`	string	binary name	Override CLI paths when the binary is not on `$PATH`.

HarnessResult — what comes back

Field	Type	What it is
`result` / `text`	string	Raw agent response (last message text).
`parsed`	model / null	Validated schema instance when `schema=` was passed. `null` if validation fell through all repair layers.
`is_error` / `error_message`	bool / string	True when the run failed terminally. Inspect `error_message` for the diagnosis.
`cost_usd`	float / null	Total cost reported by the provider. `null` when the provider does not surface cost.
`num_turns`	int	Iterations the agent took. Useful for cost monitoring and tuning `max_turns`.
`session_id`	string	Provider session identifier — pass back in `resume_session_id` to continue a multi-turn run.
`messages`	list	Full message stream (provider-specific shape). Inspect for debugging.

Schema-bound output

Pass a Pydantic model (Python), Zod schema (TypeScript), or Go struct as schema=. The runner injects an OUTPUT REQUIREMENTS suffix telling the agent to write JSON to .agentfield_output.json, then reads, repairs, validates, and returns result.parsed as a validated instance. Three recovery layers run before declaring failure:

Parse the output file directly.
Cosmetic repair — strip markdown fences, trailing commas, repair truncated braces.
One-shot AI repair — re-emit the same content as valid JSON conforming to the schema (no tools, no exploration; cheap reformatting only).

After that, the run is retried up to max_retries times. The output file is cleaned up automatically.

Provider switching is a one-field flip

The four providers are interchangeable through provider=. Same loop code, different worker:

# Plan with Claude (careful reasoner), execute with Codex (fast implementer).
plan = await app.harness(prompt, provider="claude-code", permission_mode="plan", schema=ChangePlan)
edits = await app.harness(apply, provider="codex",       permission_mode="auto", schema=EditReport)

const plan  = await app.harness(prompt, { provider: 'claude-code', permissionMode: 'plan', schema: ChangePlan });
const edits = await app.harness(apply,  { provider: 'codex',       permissionMode: 'auto', schema: EditReport });

plan, _ := app.Harness(ctx, prompt, planSchema, &planOut,
    harness.Options{Provider: "claude-code", PermissionMode: "plan"})
edits, _ := app.Harness(ctx, apply, editSchema, &editOut,
    harness.Options{Provider: "codex", PermissionMode: "auto"})

See the four provider docs for the role each one plays best:

Anthropic — Claude Code — the careful reasoner.
OpenAI — Codex — the fast implementer.
Google — Gemini CLI — the long-context worker.
OpenCode — the open-model path.

Common patterns

Retry with provider fallback

When the primary provider trips a budget or rate-limit, fall back to a cheaper one without losing the loop's intent.

async def hardened_harness(prompt: str, schema, **opts):
    for provider in ("claude-code", "opencode"):
        result = await app.harness(prompt, schema=schema, provider=provider, **opts)
        if not result.is_error and result.parsed is not None:
            return result
    raise RuntimeError(f"all providers failed: {result.error_message}")

async function hardenedHarness<T>(prompt: string, schema: T, opts: HarnessOptions = {}) {
  for (const provider of ['claude-code', 'opencode'] as const) {
    const r = await app.harness(prompt, { ...opts, provider, schema });
    if (!r.isError && r.parsed !== null) return r;
  }
  throw new Error('all providers failed');
}

for _, provider := range []string{"claude-code", "opencode"} {
    r, err := app.Harness(ctx, prompt, schema, dest,
        harness.Options{Provider: provider})
    if err == nil && !r.IsError {
        return r, nil
    }
}
return nil, errors.New("all providers failed")

Tournament

Run multiple providers in parallel against the same prompt and pick the best result with an LLM-as-judge call.

runs = await asyncio.gather(*[
    app.harness(prompt, provider=p, schema=ImplResult, max_budget_usd=0.05)
    for p in ("claude-code", "codex", "opencode")
])

verdict = await app.harness(
    "Pick the best implementation. Score on correctness and minimality.\n\n"
    + format_runs(runs),
    provider="claude-code",
    schema=Verdict,
)
return runs[verdict.parsed.winner_index]

const runs = await Promise.all(
  (['claude-code', 'codex', 'opencode'] as const).map((p) =>
    app.harness(prompt, { provider: p, schema: ImplResult, maxBudgetUsd: 0.05 }),
  ),
);
const verdict = await app.harness(
  `Pick the best implementation.\n\n${formatRuns(runs)}`,
  { provider: 'claude-code', schema: Verdict },
);
return runs[verdict.parsed.winnerIndex];

var runs []*harness.Result
for _, p := range []string{"claude-code", "codex", "opencode"} {
    r, _ := app.Harness(ctx, prompt, implSchema, &implOut,
        harness.Options{Provider: p, MaxBudgetUSD: 0.05})
    runs = append(runs, r)
}
v, _ := app.Harness(ctx, judgePrompt(runs), verdictSchema, &verdict,
    harness.Options{Provider: "claude-code"})

Adversarial verifier

One provider proposes a change, a second one verifies it. The verifier's output triggers re-runs until severity drops or the budget caps out.

for attempt in range(3):
    impl = await app.harness(prompt, provider="codex", schema=ImplResult)
    if impl.is_error:
        continue

    review = await app.harness(
        f"Find real bugs in this change:\n\n{impl.parsed.diff}",
        provider="claude-code",
        permission_mode="plan",
        schema=ReviewResult,
    )
    if review.parsed.severity in ("low", "medium"):
        return impl  # accepted

    prompt = f"Re-attempt. Previous reviewer findings:\n{review.parsed.findings}\n\n{prompt}"

for (let attempt = 0; attempt < 3; attempt++) {
  const impl = await app.harness(prompt, { provider: 'codex', schema: ImplResult });
  if (impl.isError) continue;

  const review = await app.harness(
    `Find real bugs in this change:\n\n${impl.parsed.diff}`,
    { provider: 'claude-code', permissionMode: 'plan', schema: ReviewResult },
  );
  if (review.parsed.severity !== 'high') return impl;
  prompt = `Re-attempt. Reviewer findings:\n${review.parsed.findings}\n\n${prompt}`;
}

for attempt := 0; attempt < 3; attempt++ {
    impl, _ := app.Harness(ctx, prompt, implSchema, &implOut,
        harness.Options{Provider: "codex"})
    if impl.IsError { continue }

    review, _ := app.Harness(ctx, reviewPrompt(implOut), reviewSchema, &reviewOut,
        harness.Options{Provider: "claude-code", PermissionMode: "plan"})
    if reviewOut.Severity != "high" { return implOut, nil }

    prompt = retryPrompt(reviewOut, prompt)
}

Authentication

Each provider reads its own credentials from the environment forwarded to the harness subprocess. None of these need to be passed through harness_config.env if they're already in the process environment.

Provider	Required env vars
`claude-code`	`ANTHROPIC_API_KEY` (or Vertex / Bedrock routing via `claude_agent_sdk` config)
`codex`	`OPENAI_API_KEY`, or run `codex login` once for OAuth
`gemini`	`GEMINI_API_KEY`, or run `gemini auth login` once for OAuth
`opencode`	Whatever its `opencode auth login` flow configures — `OPENROUTER_API_KEY`, `OLLAMA_HOST`, etc.