Agent Harness

Complete examples of dispatching multi-turn tasks to tool-equipped external agents

Agent Harness

Dispatch multi-turn tasks to tool-equipped external agents from inside your agent workflows

Under Active Development

.harness() is being implemented on feat/harness-v2 (Epic #208). These examples reflect the planned API and will be verified before going live.

The harness lets your agent delegate multi-turn work to an external agent with full tool access. Instead of a single prompt-in, response-out call, the agent reads files, runs commands, writes output, and iterates until the task is done. Your agent gets back a typed, schema-validated result.

These examples cover the full range: basic code generation, automated code review, multi-provider comparison, composing harness with human approval, and cost-controlled batch processing.


Example 1: Basic Code Generation

An agent that generates a Python utility function, writes it to disk, runs its tests, and returns structured metadata about what was produced.

from agentfield import Agent
from pydantic import BaseModel
from typing import List

app = Agent(
    "code-generator",
    harness_config={
        "provider": "claude-code",
        "model": "sonnet",
        "max_turns": 15,
        "max_budget_usd": 2.0,
        "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
    },
)


class GeneratedCode(BaseModel):
    filename: str
    language: str
    code: str
    test_filename: str
    tests_pass: bool
    num_tests: int


@app.reasoner()
async def generate_utility(description: str, output_dir: str):
    """
    Asks the agent to write a utility function matching the
    description, place it in output_dir, write unit tests alongside it,
    and run those tests. Returns structured metadata about the result.
    """
    result = await app.harness(
        f"Create a Python utility function: {description}. "
        f"Write the implementation to {output_dir}/. "
        f"Name the file after the function (snake_case). "
        f"Write unit tests in a test_ prefixed file in the same directory. "
        f"Run the tests with pytest and confirm they pass. "
        f"Output a JSON object with: filename, language, code (full source), "
        f"test_filename, tests_pass (bool), num_tests (int).",
        schema=GeneratedCode,
        cwd=output_dir,
    )

    if result.is_error:
        app.note(
            f"Code generation failed: {result.error_message}",
            tags=["error", "harness"],
        )
        return {
            "status": "failed",
            "error": result.error_message,
            "turns_used": result.num_turns,
        }

    generated = result.parsed

    app.note(
        f"Generated {generated.filename} with {generated.num_tests} tests "
        f"({'passing' if generated.tests_pass else 'failing'}). "
        f"Cost: ${result.cost_usd:.4f} over {result.num_turns} turns.",
        tags=["harness", "codegen"],
    )

    return {
        "status": "completed",
        "file": generated.filename,
        "test_file": generated.test_filename,
        "tests_pass": generated.tests_pass,
        "num_tests": generated.num_tests,
        "cost_usd": result.cost_usd,
        "turns": result.num_turns,
        "duration_ms": result.duration_ms,
    }
import { Agent } from "@agentfield/sdk";
import { z } from "zod";

const GeneratedCode = z.object({
  filename: z.string(),
  language: z.string(),
  code: z.string(),
  testFilename: z.string(),
  testsPass: z.boolean(),
  numTests: z.number(),
});

const app = new Agent("code-generator", {
  harnessConfig: {
    provider: "claude-code",
    model: "sonnet",
    maxTurns: 15,
    maxBudgetUsd: 2.0,
    tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
  },
});

app.reasoner(
  "generateUtility",
  async (
    { description, outputDir }: { description: string; outputDir: string },
    ctx
  ) => {
    const result = await app.harness(
      `Create a Python utility function: ${description}. ` +
        `Write the implementation to ${outputDir}/. ` +
        `Name the file after the function (snake_case). ` +
        `Write unit tests in a test_ prefixed file in the same directory. ` +
        `Run the tests with pytest and confirm they pass. ` +
        `Output a JSON object with: filename, language, code (full source), ` +
        `testFilename, testsPass (bool), numTests (int).`,
      {
        schema: GeneratedCode,
        cwd: outputDir,
      }
    );

    if (result.isError) {
      ctx.note(`Code generation failed: ${result.errorMessage}`, {
        tags: ["error", "harness"],
      });
      return {
        status: "failed",
        error: result.errorMessage,
        turnsUsed: result.numTurns,
      };
    }

    const generated = result.parsed!;

    ctx.note(
      `Generated ${generated.filename} with ${generated.numTests} tests ` +
        `(${generated.testsPass ? "passing" : "failing"}). ` +
        `Cost: $${result.costUsd?.toFixed(4)} over ${result.numTurns} turns.`,
      { tags: ["harness", "codegen"] }
    );

    return {
      status: "completed",
      file: generated.filename,
      testFile: generated.testFilename,
      testsPass: generated.testsPass,
      numTests: generated.numTests,
      costUsd: result.costUsd,
      turns: result.numTurns,
      durationMs: result.durationMs,
    };
  }
);

app.start();
package main

import (
	"context"
	"fmt"
	"log"

	"github.com/agentfield/sdk-go/agentfield"
)

type GeneratedCode struct {
	Filename     string `json:"filename"`
	Language     string `json:"language"`
	Code         string `json:"code"`
	TestFilename string `json:"test_filename"`
	TestsPass    bool   `json:"tests_pass"`
	NumTests     int    `json:"num_tests"`
}

func main() {
	app := agentfield.NewAgent("code-generator",
		agentfield.WithHarnessConfig(agentfield.HarnessConfig{
			Provider:     "claude-code",
			Model:        "sonnet",
			MaxTurns:     15,
			MaxBudgetUSD: 2.0,
			Tools:        []string{"Read", "Write", "Edit", "Bash", "Glob", "Grep"},
		}),
	)

	app.Reasoner("generateUtility", func(ctx context.Context, params struct {
		Description string `json:"description"`
		OutputDir   string `json:"output_dir"`
	}) (map[string]any, error) {
		prompt := fmt.Sprintf(
			"Create a Python utility function: %s. "+
				"Write the implementation to %s/. "+
				"Name the file after the function (snake_case). "+
				"Write unit tests in a test_ prefixed file in the same directory. "+
				"Run the tests with pytest and confirm they pass. "+
				"Output a JSON object with: filename, language, code (full source), "+
				"test_filename, tests_pass (bool), num_tests (int).",
			params.Description, params.OutputDir,
		)

		var generated GeneratedCode
		result, err := app.Harness(ctx, prompt,
			agentfield.WithSchema(&generated),
			agentfield.WithCwd(params.OutputDir),
		)
		if err != nil {
			return nil, fmt.Errorf("harness error: %w", err)
		}

		if result.IsError {
			app.Note(ctx, fmt.Sprintf("Code generation failed: %s", result.ErrorMessage),
				agentfield.WithTags("error", "harness"))
			return map[string]any{
				"status":     "failed",
				"error":      result.ErrorMessage,
				"turns_used": result.NumTurns,
			}, nil
		}

		app.Note(ctx,
			fmt.Sprintf("Generated %s with %d tests (cost: $%.4f, turns: %d)",
				generated.Filename, generated.NumTests, result.CostUSD, result.NumTurns),
			agentfield.WithTags("harness", "codegen"),
		)

		return map[string]any{
			"status":      "completed",
			"file":        generated.Filename,
			"test_file":   generated.TestFilename,
			"tests_pass":  generated.TestsPass,
			"num_tests":   generated.NumTests,
			"cost_usd":    result.CostUSD,
			"turns":       result.NumTurns,
			"duration_ms": result.DurationMs,
		}, nil
	})

	if err := app.Start(); err != nil {
		log.Fatal(err)
	}
}

Example 2: Code Review and Fix

An agent that reviews a file for issues, applies fixes, and reports what changed. The agent reads the source, identifies problems, rewrites the file, and runs the test suite to confirm nothing broke.

from agentfield import Agent
from pydantic import BaseModel
from typing import List, Literal

app = Agent(
    "code-reviewer",
    harness_config={
        "provider": "claude-code",
        "model": "sonnet",
        "max_turns": 25,
        "max_budget_usd": 4.0,
        "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
    },
)


class CodeIssue(BaseModel):
    line: int
    category: Literal["bug", "style", "performance", "security", "maintainability"]
    description: str
    fixed: bool


class ReviewResult(BaseModel):
    file_reviewed: str
    issues_found: List[CodeIssue]
    issues_fixed: int
    all_tests_pass: bool
    summary: str


@app.reasoner()
async def review_and_fix(file_path: str, repo_root: str, focus: List[str]):
    """
    Reviews a source file for issues in the specified categories,
    applies fixes where possible, runs the test suite, and returns
    a structured report of what was found and changed.
    """
    focus_str = ", ".join(focus) if focus else "all categories"

    result = await app.harness(
        f"Review the file at {file_path} for code issues. "
        f"Focus on: {focus_str}. "
        f"For each issue found: record the line number, category, and a one-sentence description. "
        f"Apply fixes directly to the file where you can. "
        f"After fixing, run the test suite from {repo_root} and check if all tests pass. "
        f"Output a JSON object with: file_reviewed, issues_found (list of objects with "
        f"line, category, description, fixed), issues_fixed (count), "
        f"all_tests_pass (bool), summary (one paragraph).",
        schema=ReviewResult,
        cwd=repo_root,
    )

    if result.is_error:
        app.note(
            f"Review failed for {file_path}: {result.error_message}",
            tags=["error", "review"],
        )
        return {
            "status": "failed",
            "file": file_path,
            "error": result.error_message,
        }

    review = result.parsed
    unfixed = [i for i in review.issues_found if not i.fixed]

    app.note(
        f"Reviewed {review.file_reviewed}: "
        f"{len(review.issues_found)} issues found, "
        f"{review.issues_fixed} fixed, "
        f"{len(unfixed)} require manual attention. "
        f"Tests {'pass' if review.all_tests_pass else 'FAIL'}. "
        f"Cost: ${result.cost_usd:.4f}.",
        tags=["review", "harness"],
    )

    return {
        "status": "completed",
        "file": review.file_reviewed,
        "issues_found": len(review.issues_found),
        "issues_fixed": review.issues_fixed,
        "unfixed_issues": [
            {"line": i.line, "category": i.category, "description": i.description}
            for i in unfixed
        ],
        "all_tests_pass": review.all_tests_pass,
        "summary": review.summary,
        "cost_usd": result.cost_usd,
        "turns": result.num_turns,
    }
import { Agent } from "@agentfield/sdk";
import { z } from "zod";

const CodeIssue = z.object({
  line: z.number(),
  category: z.enum([
    "bug",
    "style",
    "performance",
    "security",
    "maintainability",
  ]),
  description: z.string(),
  fixed: z.boolean(),
});

const ReviewResult = z.object({
  fileReviewed: z.string(),
  issuesFound: z.array(CodeIssue),
  issuesFixed: z.number(),
  allTestsPass: z.boolean(),
  summary: z.string(),
});

const app = new Agent("code-reviewer", {
  harnessConfig: {
    provider: "claude-code",
    model: "sonnet",
    maxTurns: 25,
    maxBudgetUsd: 4.0,
    tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
  },
});

app.reasoner(
  "reviewAndFix",
  async (
    {
      filePath,
      repoRoot,
      focus,
    }: { filePath: string; repoRoot: string; focus: string[] },
    ctx
  ) => {
    const focusStr = focus.length > 0 ? focus.join(", ") : "all categories";

    const result = await app.harness(
      `Review the file at ${filePath} for code issues. ` +
        `Focus on: ${focusStr}. ` +
        `For each issue found: record the line number, category, and a one-sentence description. ` +
        `Apply fixes directly to the file where you can. ` +
        `After fixing, run the test suite from ${repoRoot} and check if all tests pass. ` +
        `Output a JSON object with: fileReviewed, issuesFound (list of objects with ` +
        `line, category, description, fixed), issuesFixed (count), ` +
        `allTestsPass (bool), summary (one paragraph).`,
      {
        schema: ReviewResult,
        cwd: repoRoot,
      }
    );

    if (result.isError) {
      ctx.note(`Review failed for ${filePath}: ${result.errorMessage}`, {
        tags: ["error", "review"],
      });
      return { status: "failed", file: filePath, error: result.errorMessage };
    }

    const review = result.parsed!;
    const unfixed = review.issuesFound.filter((i) => !i.fixed);

    ctx.note(
      `Reviewed ${review.fileReviewed}: ` +
        `${review.issuesFound.length} issues found, ` +
        `${review.issuesFixed} fixed, ` +
        `${unfixed.length} require manual attention. ` +
        `Tests ${review.allTestsPass ? "pass" : "FAIL"}. ` +
        `Cost: $${result.costUsd?.toFixed(4)}.`,
      { tags: ["review", "harness"] }
    );

    return {
      status: "completed",
      file: review.fileReviewed,
      issuesFound: review.issuesFound.length,
      issuesFixed: review.issuesFixed,
      unfixedIssues: unfixed.map((i) => ({
        line: i.line,
        category: i.category,
        description: i.description,
      })),
      allTestsPass: review.allTestsPass,
      summary: review.summary,
      costUsd: result.costUsd,
      turns: result.numTurns,
    };
  }
);

app.start();
package main

import (
	"context"
	"fmt"
	"log"
	"strings"

	"github.com/agentfield/sdk-go/agentfield"
)

type CodeIssue struct {
	Line        int    `json:"line"`
	Category    string `json:"category"`
	Description string `json:"description"`
	Fixed       bool   `json:"fixed"`
}

type ReviewResult struct {
	FileReviewed string      `json:"file_reviewed"`
	IssuesFound  []CodeIssue `json:"issues_found"`
	IssuesFixed  int         `json:"issues_fixed"`
	AllTestsPass bool        `json:"all_tests_pass"`
	Summary      string      `json:"summary"`
}

func main() {
	app := agentfield.NewAgent("code-reviewer",
		agentfield.WithHarnessConfig(agentfield.HarnessConfig{
			Provider:     "claude-code",
			Model:        "sonnet",
			MaxTurns:     25,
			MaxBudgetUSD: 4.0,
			Tools:        []string{"Read", "Write", "Edit", "Bash", "Glob", "Grep"},
		}),
	)

	app.Reasoner("reviewAndFix", func(ctx context.Context, params struct {
		FilePath string   `json:"file_path"`
		RepoRoot string   `json:"repo_root"`
		Focus    []string `json:"focus"`
	}) (map[string]any, error) {
		focusStr := "all categories"
		if len(params.Focus) > 0 {
			focusStr = strings.Join(params.Focus, ", ")
		}

		prompt := fmt.Sprintf(
			"Review the file at %s for code issues. "+
				"Focus on: %s. "+
				"For each issue found: record the line number, category, and a one-sentence description. "+
				"Apply fixes directly to the file where you can. "+
				"After fixing, run the test suite from %s and check if all tests pass. "+
				"Output a JSON object with: file_reviewed, issues_found (list of objects with "+
				"line, category, description, fixed), issues_fixed (count), "+
				"all_tests_pass (bool), summary (one paragraph).",
			params.FilePath, focusStr, params.RepoRoot,
		)

		var review ReviewResult
		result, err := app.Harness(ctx, prompt,
			agentfield.WithSchema(&review),
			agentfield.WithCwd(params.RepoRoot),
		)
		if err != nil {
			return nil, fmt.Errorf("harness error: %w", err)
		}

		if result.IsError {
			app.Note(ctx, fmt.Sprintf("Review failed: %s", result.ErrorMessage),
				agentfield.WithTags("error", "review"))
			return map[string]any{
				"status": "failed",
				"file":   params.FilePath,
				"error":  result.ErrorMessage,
			}, nil
		}

		var unfixed []map[string]any
		for _, issue := range review.IssuesFound {
			if !issue.Fixed {
				unfixed = append(unfixed, map[string]any{
					"line":        issue.Line,
					"category":    issue.Category,
					"description": issue.Description,
				})
			}
		}

		app.Note(ctx,
			fmt.Sprintf("Reviewed %s: %d issues, %d fixed. Tests: %v. Cost: $%.4f",
				review.FileReviewed, len(review.IssuesFound),
				review.IssuesFixed, review.AllTestsPass, result.CostUSD),
			agentfield.WithTags("review", "harness"),
		)

		return map[string]any{
			"status":         "completed",
			"file":           review.FileReviewed,
			"issues_found":   len(review.IssuesFound),
			"issues_fixed":   review.IssuesFixed,
			"unfixed_issues": unfixed,
			"all_tests_pass": review.AllTestsPass,
			"summary":        review.Summary,
			"cost_usd":       result.CostUSD,
			"turns":          result.NumTurns,
		}, nil
	})

	if err := app.Start(); err != nil {
		log.Fatal(err)
	}
}

Example 3: Multi-Provider Comparison

An agent that runs the same task on two different providers in parallel, then compares the results by cost, turn count, and output quality. Useful for benchmarking providers before committing to one for production.

import asyncio
from agentfield import Agent
from pydantic import BaseModel
from typing import Optional

app = Agent(
    "provider-benchmarker",
    harness_config={
        "max_turns": 20,
        "max_budget_usd": 3.0,
        "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
    },
)


class TaskOutput(BaseModel):
    implementation_file: str
    lines_of_code: int
    tests_pass: bool
    approach_summary: str


class ComparisonReport(BaseModel):
    task: str
    provider_a: str
    provider_b: str
    provider_a_cost_usd: Optional[float]
    provider_b_cost_usd: Optional[float]
    provider_a_turns: int
    provider_b_turns: int
    provider_a_tests_pass: bool
    provider_b_tests_pass: bool
    provider_a_lines: int
    provider_b_lines: int
    winner_by_cost: str
    winner_by_turns: str
    recommendation: str


@app.reasoner()
async def compare_providers(task_description: str, work_dir: str):
    """
    Runs the same task on two providers simultaneously.
    Compares cost, turn count, and whether tests pass.
    Returns a structured comparison report.
    """
    prompt = (
        f"{task_description} "
        f"Write the implementation to {work_dir}/. "
        f"Include unit tests and run them. "
        f"Output a JSON object with: implementation_file, lines_of_code (int), "
        f"tests_pass (bool), approach_summary (one sentence)."
    )

    # Run both providers concurrently
    result_a, result_b = await asyncio.gather(
        app.harness(
            prompt,
            schema=TaskOutput,
            provider="claude-code",
            model="sonnet",
            cwd=work_dir,
        ),
        app.harness(
            prompt,
            schema=TaskOutput,
            provider="codex",
            model="codex-large",
            cwd=work_dir,
        ),
        return_exceptions=True,
    )

    # Handle provider-level failures gracefully
    a_ok = not isinstance(result_a, Exception) and not result_a.is_error
    b_ok = not isinstance(result_b, Exception) and not result_b.is_error

    if not a_ok and not b_ok:
        return {
            "status": "both_failed",
            "error_a": str(result_a) if isinstance(result_a, Exception) else result_a.error_message,
            "error_b": str(result_b) if isinstance(result_b, Exception) else result_b.error_message,
        }

    a_cost = result_a.cost_usd if a_ok else None
    b_cost = result_b.cost_usd if b_ok else None
    a_turns = result_a.num_turns if a_ok else 0
    b_turns = result_b.num_turns if b_ok else 0
    a_output = result_a.parsed if a_ok else None
    b_output = result_b.parsed if b_ok else None

    winner_cost = "claude-code" if (a_cost or 999) < (b_cost or 999) else "codex"
    winner_turns = "claude-code" if a_turns < b_turns else "codex"

    recommendation = (
        f"{'claude-code' if winner_cost == winner_cost and winner_turns == 'claude-code' else 'codex'} "
        f"performed better on this task. "
        f"claude-code: ${a_cost:.4f if a_cost else 'N/A'}, {a_turns} turns. "
        f"codex: ${b_cost:.4f if b_cost else 'N/A'}, {b_turns} turns."
    )

    app.note(
        f"Comparison complete. {recommendation}",
        tags=["benchmark", "harness"],
    )

    return {
        "status": "completed",
        "task": task_description,
        "provider_a": "claude-code",
        "provider_b": "codex",
        "provider_a_cost_usd": a_cost,
        "provider_b_cost_usd": b_cost,
        "provider_a_turns": a_turns,
        "provider_b_turns": b_turns,
        "provider_a_tests_pass": a_output.tests_pass if a_output else False,
        "provider_b_tests_pass": b_output.tests_pass if b_output else False,
        "provider_a_lines": a_output.lines_of_code if a_output else 0,
        "provider_b_lines": b_output.lines_of_code if b_output else 0,
        "winner_by_cost": winner_cost,
        "winner_by_turns": winner_turns,
        "recommendation": recommendation,
    }
import { Agent } from "@agentfield/sdk";
import { z } from "zod";

const TaskOutput = z.object({
  implementationFile: z.string(),
  linesOfCode: z.number(),
  testsPass: z.boolean(),
  approachSummary: z.string(),
});

const app = new Agent("provider-benchmarker", {
  harnessConfig: {
    maxTurns: 20,
    maxBudgetUsd: 3.0,
    tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
  },
});

app.reasoner(
  "compareProviders",
  async (
    {
      taskDescription,
      workDir,
    }: { taskDescription: string; workDir: string },
    ctx
  ) => {
    const prompt =
      `${taskDescription} ` +
      `Write the implementation to ${workDir}/. ` +
      `Include unit tests and run them. ` +
      `Output a JSON object with: implementationFile, linesOfCode (int), ` +
      `testsPass (bool), approachSummary (one sentence).`;

    // Run both providers concurrently
    const [resultA, resultB] = await Promise.allSettled([
      app.harness(prompt, {
        schema: TaskOutput,
        provider: "claude-code",
        model: "sonnet",
        cwd: workDir,
      }),
      app.harness(prompt, {
        schema: TaskOutput,
        provider: "codex",
        model: "codex-large",
        cwd: workDir,
      }),
    ]);

    const aOk =
      resultA.status === "fulfilled" &&
      !resultA.value.isError &&
      resultA.value.parsed != null;
    const bOk =
      resultB.status === "fulfilled" &&
      !resultB.value.isError &&
      resultB.value.parsed != null;

    if (!aOk && !bOk) {
      return {
        status: "both_failed",
        errorA:
          resultA.status === "rejected"
            ? String(resultA.reason)
            : (resultA.value as any).errorMessage,
        errorB:
          resultB.status === "rejected"
            ? String(resultB.reason)
            : (resultB.value as any).errorMessage,
      };
    }

    const aResult = aOk ? (resultA as PromiseFulfilledResult<any>).value : null;
    const bResult = bOk ? (resultB as PromiseFulfilledResult<any>).value : null;

    const aCost = aResult?.costUsd ?? null;
    const bCost = bResult?.costUsd ?? null;
    const aTurns = aResult?.numTurns ?? 0;
    const bTurns = bResult?.numTurns ?? 0;

    const winnerCost =
      (aCost ?? Infinity) < (bCost ?? Infinity) ? "claude-code" : "codex";
    const winnerTurns = aTurns < bTurns ? "claude-code" : "codex";

    const recommendation =
      `claude-code: $${aCost?.toFixed(4) ?? "N/A"}, ${aTurns} turns. ` +
      `codex: $${bCost?.toFixed(4) ?? "N/A"}, ${bTurns} turns. ` +
      `Winner by cost: ${winnerCost}. Winner by turns: ${winnerTurns}.`;

    ctx.note(`Comparison complete. ${recommendation}`, {
      tags: ["benchmark", "harness"],
    });

    return {
      status: "completed",
      taskDescription,
      providerA: "claude-code",
      providerB: "codex",
      providerACostUsd: aCost,
      providerBCostUsd: bCost,
      providerATurns: aTurns,
      providerBTurns: bTurns,
      providerATestsPass: aResult?.parsed?.testsPass ?? false,
      providerBTestsPass: bResult?.parsed?.testsPass ?? false,
      providerALines: aResult?.parsed?.linesOfCode ?? 0,
      providerBLines: bResult?.parsed?.linesOfCode ?? 0,
      winnerByCost: winnerCost,
      winnerByTurns: winnerTurns,
      recommendation,
    };
  }
);

app.start();

Parallel Harness Calls

asyncio.gather (Python) and Promise.allSettled (TypeScript) let you run multiple harness calls concurrently. Each call is independent — they don't share state. Use return_exceptions=True in Python so one provider failure doesn't cancel the other.


Example 4: Composing with Human-in-the-Loop

An agent that uses the harness to implement a feature, then pauses for human review before the changes are deployed. The agent does the work; a human approves before anything goes to production.

from agentfield import Agent
from pydantic import BaseModel
from typing import List

app = Agent(
    "feature-implementer",
    harness_config={
        "provider": "claude-code",
        "model": "sonnet",
        "max_turns": 40,
        "max_budget_usd": 8.0,
        "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
        "permission_mode": "plan",
    },
)


class ImplementationResult(BaseModel):
    files_created: List[str]
    files_modified: List[str]
    tests_added: int
    all_tests_pass: bool
    pr_description: str
    breaking_changes: bool


@app.reasoner()
async def implement_and_review(
    feature_spec: str,
    repo_root: str,
    review_url_template: str,
    execution_id: str,
):
    """
    Implements a feature using the harness, then pauses for human review.
    If approved, returns the implementation details for the deploy step.
    If rejected, returns the reviewer's feedback for iteration.

    review_url_template: URL where the reviewer can inspect the diff,
    e.g. "https://review.example.com/features/{execution_id}"
    """
    # Step 1: Implement the feature
    result = await app.harness(
        f"Implement the following feature in the repository at {repo_root}: "
        f"{feature_spec}. "
        f"Follow existing code conventions. "
        f"Add tests for all new behavior. "
        f"Run the full test suite and confirm it passes. "
        f"Do not commit or push — leave changes staged. "
        f"Output a JSON object with: files_created (list), files_modified (list), "
        f"tests_added (int), all_tests_pass (bool), "
        f"pr_description (markdown string suitable for a pull request body), "
        f"breaking_changes (bool).",
        schema=ImplementationResult,
        cwd=repo_root,
    )

    if result.is_error:
        app.note(
            f"Implementation failed: {result.error_message}",
            tags=["error", "harness"],
        )
        return {
            "status": "implementation_failed",
            "error": result.error_message,
            "cost_usd": result.cost_usd,
        }

    impl = result.parsed

    if not impl.all_tests_pass:
        app.note(
            "Implementation complete but tests are failing. Returning for retry.",
            tags=["warning", "harness"],
        )
        return {
            "status": "tests_failing",
            "files_created": impl.files_created,
            "files_modified": impl.files_modified,
            "cost_usd": result.cost_usd,
        }

    app.note(
        f"Implementation complete: {len(impl.files_created)} files created, "
        f"{len(impl.files_modified)} modified. Tests pass. "
        f"Breaking changes: {impl.breaking_changes}. "
        f"Pausing for human review.",
        tags=["harness", "hitl"],
    )

    # Step 2: Pause for human review
    review_url = review_url_template.format(execution_id=execution_id)
    approval = await app.pause(
        approval_request_id=f"feature-review-{execution_id}",
        approval_request_url=review_url,
        expires_in_hours=72,
    )

    if not approval.approved:
        app.note(
            f"Review rejected. Feedback: {approval.feedback}",
            tags=["hitl", "rejected"],
        )
        return {
            "status": "rejected",
            "feedback": approval.feedback,
            "files_created": impl.files_created,
            "files_modified": impl.files_modified,
            "pr_description": impl.pr_description,
        }

    app.note(
        f"Review approved by {approval.reviewer_id}. Ready to deploy.",
        tags=["hitl", "approved"],
    )

    return {
        "status": "approved",
        "files_created": impl.files_created,
        "files_modified": impl.files_modified,
        "tests_added": impl.tests_added,
        "all_tests_pass": impl.all_tests_pass,
        "pr_description": impl.pr_description,
        "breaking_changes": impl.breaking_changes,
        "approved_by": approval.reviewer_id,
        "implementation_cost_usd": result.cost_usd,
        "implementation_turns": result.num_turns,
    }
import { Agent } from "@agentfield/sdk";
import { z } from "zod";

const ImplementationResult = z.object({
  filesCreated: z.array(z.string()),
  filesModified: z.array(z.string()),
  testsAdded: z.number(),
  allTestsPass: z.boolean(),
  prDescription: z.string(),
  breakingChanges: z.boolean(),
});

const app = new Agent("feature-implementer", {
  harnessConfig: {
    provider: "claude-code",
    model: "sonnet",
    maxTurns: 40,
    maxBudgetUsd: 8.0,
    tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
    permissionMode: "plan",
  },
});

app.reasoner(
  "implementAndReview",
  async (
    {
      featureSpec,
      repoRoot,
      reviewUrlTemplate,
      executionId,
    }: {
      featureSpec: string;
      repoRoot: string;
      reviewUrlTemplate: string;
      executionId: string;
    },
    ctx
  ) => {
    const result = await app.harness(
      `Implement the following feature in the repository at ${repoRoot}: ` +
        `${featureSpec}. ` +
        `Follow existing code conventions. ` +
        `Add tests for all new behavior. ` +
        `Run the full test suite and confirm it passes. ` +
        `Do not commit or push — leave changes staged. ` +
        `Output a JSON object with: filesCreated (list), filesModified (list), ` +
        `testsAdded (int), allTestsPass (bool), ` +
        `prDescription (markdown string suitable for a pull request body), ` +
        `breakingChanges (bool).`,
      {
        schema: ImplementationResult,
        cwd: repoRoot,
      }
    );

    if (result.isError) {
      ctx.note(`Implementation failed: ${result.errorMessage}`, {
        tags: ["error", "harness"],
      });
      return {
        status: "implementation_failed",
        error: result.errorMessage,
        costUsd: result.costUsd,
      };
    }

    const impl = result.parsed!;

    if (!impl.allTestsPass) {
      ctx.note("Implementation complete but tests are failing.", {
        tags: ["warning", "harness"],
      });
      return {
        status: "tests_failing",
        filesCreated: impl.filesCreated,
        filesModified: impl.filesModified,
        costUsd: result.costUsd,
      };
    }

    ctx.note(
      `Implementation complete. ${impl.filesCreated.length} files created, ` +
        `${impl.filesModified.length} modified. Tests pass. Pausing for review.`,
      { tags: ["harness", "hitl"] }
    );

    const reviewUrl = reviewUrlTemplate.replace("{executionId}", executionId);
    const approval = await app.pause({
      approvalRequestId: `feature-review-${executionId}`,
      approvalRequestUrl: reviewUrl,
      expiresInHours: 72,
    });

    if (!approval.approved) {
      ctx.note(`Review rejected. Feedback: ${approval.feedback}`, {
        tags: ["hitl", "rejected"],
      });
      return {
        status: "rejected",
        feedback: approval.feedback,
        filesCreated: impl.filesCreated,
        filesModified: impl.filesModified,
        prDescription: impl.prDescription,
      };
    }

    ctx.note(`Review approved by ${approval.reviewerId}. Ready to deploy.`, {
      tags: ["hitl", "approved"],
    });

    return {
      status: "approved",
      filesCreated: impl.filesCreated,
      filesModified: impl.filesModified,
      testsAdded: impl.testsAdded,
      allTestsPass: impl.allTestsPass,
      prDescription: impl.prDescription,
      breakingChanges: impl.breakingChanges,
      approvedBy: approval.reviewerId,
      implementationCostUsd: result.costUsd,
      implementationTurns: result.numTurns,
    };
  }
);

app.start();

permission_mode: plan

Setting permission_mode: "plan" tells the agent to show its plan before writing any files. This gives you a chance to catch misunderstandings before the agent does work. Combine it with app.pause() for a two-stage review: plan approval, then diff approval.


Example 5: Cost-Controlled Batch Processing

An agent that processes a list of tasks with a shared budget. Each task runs sequentially, tracking cumulative spend. When the budget is exhausted, remaining tasks are skipped and reported.

from agentfield import Agent
from pydantic import BaseModel
from typing import List, Optional

app = Agent(
    "batch-processor",
    harness_config={
        "provider": "claude-code",
        "model": "sonnet",
        "max_turns": 20,
        "tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
    },
)


class TaskSpec(BaseModel):
    task_id: str
    description: str
    target_file: str
    priority: int  # 1 = highest


class TaskOutcome(BaseModel):
    task_id: str
    status: str  # "completed", "failed", "skipped"
    cost_usd: Optional[float]
    turns: Optional[int]
    error: Optional[str]
    tests_pass: Optional[bool]


class BatchResult(BaseModel):
    total_tasks: int
    completed: int
    failed: int
    skipped: int
    total_cost_usd: float
    budget_usd: float
    budget_remaining_usd: float
    outcomes: List[TaskOutcome]


class SingleTaskOutput(BaseModel):
    tests_pass: bool
    summary: str


@app.reasoner()
async def process_batch(
    tasks: List[dict],
    repo_root: str,
    budget_usd: float,
):
    """
    Processes a list of tasks in priority order.
    Tracks cumulative cost against the budget.
    Skips remaining tasks when the budget is exhausted.
    Returns a full report of what was completed, failed, or skipped.
    """
    # Sort by priority (1 = highest)
    task_specs = sorted(
        [TaskSpec(**t) for t in tasks],
        key=lambda t: t.priority,
    )

    outcomes: List[TaskOutcome] = []
    total_cost = 0.0
    completed = 0
    failed = 0
    skipped = 0

    for spec in task_specs:
        remaining_budget = budget_usd - total_cost

        if remaining_budget <= 0.0:
            app.note(
                f"Budget exhausted (${total_cost:.4f} / ${budget_usd:.2f}). "
                f"Skipping task {spec.task_id}: {spec.description[:60]}",
                tags=["budget", "skipped"],
            )
            outcomes.append(TaskOutcome(
                task_id=spec.task_id,
                status="skipped",
                cost_usd=None,
                turns=None,
                error="Budget exhausted",
                tests_pass=None,
            ))
            skipped += 1
            continue

        app.note(
            f"Starting task {spec.task_id} (priority {spec.priority}). "
            f"Budget remaining: ${remaining_budget:.4f}.",
            tags=["batch", "harness"],
        )

        result = await app.harness(
            f"In the repository at {repo_root}, perform the following task: "
            f"{spec.description}. "
            f"The primary file to work on is {spec.target_file}. "
            f"Run the test suite after making changes. "
            f"Output a JSON object with: tests_pass (bool), summary (one sentence).",
            schema=SingleTaskOutput,
            cwd=repo_root,
            max_budget_usd=remaining_budget,  # Cap this task at remaining budget
        )

        task_cost = result.cost_usd or 0.0
        total_cost += task_cost

        if result.is_error:
            app.note(
                f"Task {spec.task_id} failed: {result.error_message}. "
                f"Cost so far: ${total_cost:.4f}.",
                tags=["error", "batch"],
            )
            outcomes.append(TaskOutcome(
                task_id=spec.task_id,
                status="failed",
                cost_usd=task_cost,
                turns=result.num_turns,
                error=result.error_message,
                tests_pass=None,
            ))
            failed += 1
        else:
            output = result.parsed
            app.note(
                f"Task {spec.task_id} completed in {result.num_turns} turns "
                f"(${task_cost:.4f}). Tests: {'pass' if output.tests_pass else 'FAIL'}.",
                tags=["batch", "harness"],
            )
            outcomes.append(TaskOutcome(
                task_id=spec.task_id,
                status="completed",
                cost_usd=task_cost,
                turns=result.num_turns,
                error=None,
                tests_pass=output.tests_pass,
            ))
            completed += 1

    batch_result = BatchResult(
        total_tasks=len(task_specs),
        completed=completed,
        failed=failed,
        skipped=skipped,
        total_cost_usd=total_cost,
        budget_usd=budget_usd,
        budget_remaining_usd=max(0.0, budget_usd - total_cost),
        outcomes=outcomes,
    )

    app.note(
        f"Batch complete: {completed} done, {failed} failed, {skipped} skipped. "
        f"Total cost: ${total_cost:.4f} / ${budget_usd:.2f}.",
        tags=["batch", "summary"],
    )

    return batch_result.dict()
import { Agent } from "@agentfield/sdk";
import { z } from "zod";

const TaskSpec = z.object({
  taskId: z.string(),
  description: z.string(),
  targetFile: z.string(),
  priority: z.number(),
});

const SingleTaskOutput = z.object({
  testsPass: z.boolean(),
  summary: z.string(),
});

type TaskOutcome = {
  taskId: string;
  status: "completed" | "failed" | "skipped";
  costUsd: number | null;
  turns: number | null;
  error: string | null;
  testsPass: boolean | null;
};

const app = new Agent("batch-processor", {
  harnessConfig: {
    provider: "claude-code",
    model: "sonnet",
    maxTurns: 20,
    tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
  },
});

app.reasoner(
  "processBatch",
  async (
    {
      tasks,
      repoRoot,
      budgetUsd,
    }: { tasks: unknown[]; repoRoot: string; budgetUsd: number },
    ctx
  ) => {
    const taskSpecs = tasks
      .map((t) => TaskSpec.parse(t))
      .sort((a, b) => a.priority - b.priority);

    const outcomes: TaskOutcome[] = [];
    let totalCost = 0;
    let completed = 0;
    let failed = 0;
    let skipped = 0;

    for (const spec of taskSpecs) {
      const remainingBudget = budgetUsd - totalCost;

      if (remainingBudget <= 0) {
        ctx.note(
          `Budget exhausted ($${totalCost.toFixed(4)} / $${budgetUsd.toFixed(2)}). ` +
            `Skipping task ${spec.taskId}.`,
          { tags: ["budget", "skipped"] }
        );
        outcomes.push({
          taskId: spec.taskId,
          status: "skipped",
          costUsd: null,
          turns: null,
          error: "Budget exhausted",
          testsPass: null,
        });
        skipped++;
        continue;
      }

      ctx.note(
        `Starting task ${spec.taskId} (priority ${spec.priority}). ` +
          `Budget remaining: $${remainingBudget.toFixed(4)}.`,
        { tags: ["batch", "harness"] }
      );

      const result = await app.harness(
        `In the repository at ${repoRoot}, perform the following task: ` +
          `${spec.description}. ` +
          `The primary file to work on is ${spec.targetFile}. ` +
          `Run the test suite after making changes. ` +
          `Output a JSON object with: testsPass (bool), summary (one sentence).`,
        {
          schema: SingleTaskOutput,
          cwd: repoRoot,
          maxBudgetUsd: remainingBudget,
        }
      );

      const taskCost = result.costUsd ?? 0;
      totalCost += taskCost;

      if (result.isError) {
        ctx.note(
          `Task ${spec.taskId} failed: ${result.errorMessage}. ` +
            `Cost so far: $${totalCost.toFixed(4)}.`,
          { tags: ["error", "batch"] }
        );
        outcomes.push({
          taskId: spec.taskId,
          status: "failed",
          costUsd: taskCost,
          turns: result.numTurns,
          error: result.errorMessage ?? null,
          testsPass: null,
        });
        failed++;
      } else {
        const output = result.parsed!;
        ctx.note(
          `Task ${spec.taskId} completed in ${result.numTurns} turns ` +
            `($${taskCost.toFixed(4)}). Tests: ${output.testsPass ? "pass" : "FAIL"}.`,
          { tags: ["batch", "harness"] }
        );
        outcomes.push({
          taskId: spec.taskId,
          status: "completed",
          costUsd: taskCost,
          turns: result.numTurns,
          error: null,
          testsPass: output.testsPass,
        });
        completed++;
      }
    }

    ctx.note(
      `Batch complete: ${completed} done, ${failed} failed, ${skipped} skipped. ` +
        `Total cost: $${totalCost.toFixed(4)} / $${budgetUsd.toFixed(2)}.`,
      { tags: ["batch", "summary"] }
    );

    return {
      totalTasks: taskSpecs.length,
      completed,
      failed,
      skipped,
      totalCostUsd: totalCost,
      budgetUsd,
      budgetRemainingUsd: Math.max(0, budgetUsd - totalCost),
      outcomes,
    };
  }
);

app.start();

Per-Call Budget Caps

Passing max_budget_usd (Python) or maxBudgetUsd (TypeScript) on individual harness calls caps the spend for that specific task. The batch example uses the remaining budget as the per-task cap, so a single expensive task can't consume the entire batch budget.


Key Patterns


Next Steps