Agent Harness
Complete examples of dispatching multi-turn tasks to tool-equipped external agents
Agent Harness
Dispatch multi-turn tasks to tool-equipped external agents from inside your agent workflows
Under Active Development
.harness() is being implemented on feat/harness-v2 (Epic #208).
These examples reflect the planned API and will be verified before going live.
The harness lets your agent delegate multi-turn work to an external agent with full tool access. Instead of a single prompt-in, response-out call, the agent reads files, runs commands, writes output, and iterates until the task is done. Your agent gets back a typed, schema-validated result.
These examples cover the full range: basic code generation, automated code review, multi-provider comparison, composing harness with human approval, and cost-controlled batch processing.
Example 1: Basic Code Generation
An agent that generates a Python utility function, writes it to disk, runs its tests, and returns structured metadata about what was produced.
from agentfield import Agent
from pydantic import BaseModel
from typing import List
app = Agent(
"code-generator",
harness_config={
"provider": "claude-code",
"model": "sonnet",
"max_turns": 15,
"max_budget_usd": 2.0,
"tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
)
class GeneratedCode(BaseModel):
filename: str
language: str
code: str
test_filename: str
tests_pass: bool
num_tests: int
@app.reasoner()
async def generate_utility(description: str, output_dir: str):
"""
Asks the agent to write a utility function matching the
description, place it in output_dir, write unit tests alongside it,
and run those tests. Returns structured metadata about the result.
"""
result = await app.harness(
f"Create a Python utility function: {description}. "
f"Write the implementation to {output_dir}/. "
f"Name the file after the function (snake_case). "
f"Write unit tests in a test_ prefixed file in the same directory. "
f"Run the tests with pytest and confirm they pass. "
f"Output a JSON object with: filename, language, code (full source), "
f"test_filename, tests_pass (bool), num_tests (int).",
schema=GeneratedCode,
cwd=output_dir,
)
if result.is_error:
app.note(
f"Code generation failed: {result.error_message}",
tags=["error", "harness"],
)
return {
"status": "failed",
"error": result.error_message,
"turns_used": result.num_turns,
}
generated = result.parsed
app.note(
f"Generated {generated.filename} with {generated.num_tests} tests "
f"({'passing' if generated.tests_pass else 'failing'}). "
f"Cost: ${result.cost_usd:.4f} over {result.num_turns} turns.",
tags=["harness", "codegen"],
)
return {
"status": "completed",
"file": generated.filename,
"test_file": generated.test_filename,
"tests_pass": generated.tests_pass,
"num_tests": generated.num_tests,
"cost_usd": result.cost_usd,
"turns": result.num_turns,
"duration_ms": result.duration_ms,
}import { Agent } from "@agentfield/sdk";
import { z } from "zod";
const GeneratedCode = z.object({
filename: z.string(),
language: z.string(),
code: z.string(),
testFilename: z.string(),
testsPass: z.boolean(),
numTests: z.number(),
});
const app = new Agent("code-generator", {
harnessConfig: {
provider: "claude-code",
model: "sonnet",
maxTurns: 15,
maxBudgetUsd: 2.0,
tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
});
app.reasoner(
"generateUtility",
async (
{ description, outputDir }: { description: string; outputDir: string },
ctx
) => {
const result = await app.harness(
`Create a Python utility function: ${description}. ` +
`Write the implementation to ${outputDir}/. ` +
`Name the file after the function (snake_case). ` +
`Write unit tests in a test_ prefixed file in the same directory. ` +
`Run the tests with pytest and confirm they pass. ` +
`Output a JSON object with: filename, language, code (full source), ` +
`testFilename, testsPass (bool), numTests (int).`,
{
schema: GeneratedCode,
cwd: outputDir,
}
);
if (result.isError) {
ctx.note(`Code generation failed: ${result.errorMessage}`, {
tags: ["error", "harness"],
});
return {
status: "failed",
error: result.errorMessage,
turnsUsed: result.numTurns,
};
}
const generated = result.parsed!;
ctx.note(
`Generated ${generated.filename} with ${generated.numTests} tests ` +
`(${generated.testsPass ? "passing" : "failing"}). ` +
`Cost: $${result.costUsd?.toFixed(4)} over ${result.numTurns} turns.`,
{ tags: ["harness", "codegen"] }
);
return {
status: "completed",
file: generated.filename,
testFile: generated.testFilename,
testsPass: generated.testsPass,
numTests: generated.numTests,
costUsd: result.costUsd,
turns: result.numTurns,
durationMs: result.durationMs,
};
}
);
app.start();package main
import (
"context"
"fmt"
"log"
"github.com/agentfield/sdk-go/agentfield"
)
type GeneratedCode struct {
Filename string `json:"filename"`
Language string `json:"language"`
Code string `json:"code"`
TestFilename string `json:"test_filename"`
TestsPass bool `json:"tests_pass"`
NumTests int `json:"num_tests"`
}
func main() {
app := agentfield.NewAgent("code-generator",
agentfield.WithHarnessConfig(agentfield.HarnessConfig{
Provider: "claude-code",
Model: "sonnet",
MaxTurns: 15,
MaxBudgetUSD: 2.0,
Tools: []string{"Read", "Write", "Edit", "Bash", "Glob", "Grep"},
}),
)
app.Reasoner("generateUtility", func(ctx context.Context, params struct {
Description string `json:"description"`
OutputDir string `json:"output_dir"`
}) (map[string]any, error) {
prompt := fmt.Sprintf(
"Create a Python utility function: %s. "+
"Write the implementation to %s/. "+
"Name the file after the function (snake_case). "+
"Write unit tests in a test_ prefixed file in the same directory. "+
"Run the tests with pytest and confirm they pass. "+
"Output a JSON object with: filename, language, code (full source), "+
"test_filename, tests_pass (bool), num_tests (int).",
params.Description, params.OutputDir,
)
var generated GeneratedCode
result, err := app.Harness(ctx, prompt,
agentfield.WithSchema(&generated),
agentfield.WithCwd(params.OutputDir),
)
if err != nil {
return nil, fmt.Errorf("harness error: %w", err)
}
if result.IsError {
app.Note(ctx, fmt.Sprintf("Code generation failed: %s", result.ErrorMessage),
agentfield.WithTags("error", "harness"))
return map[string]any{
"status": "failed",
"error": result.ErrorMessage,
"turns_used": result.NumTurns,
}, nil
}
app.Note(ctx,
fmt.Sprintf("Generated %s with %d tests (cost: $%.4f, turns: %d)",
generated.Filename, generated.NumTests, result.CostUSD, result.NumTurns),
agentfield.WithTags("harness", "codegen"),
)
return map[string]any{
"status": "completed",
"file": generated.Filename,
"test_file": generated.TestFilename,
"tests_pass": generated.TestsPass,
"num_tests": generated.NumTests,
"cost_usd": result.CostUSD,
"turns": result.NumTurns,
"duration_ms": result.DurationMs,
}, nil
})
if err := app.Start(); err != nil {
log.Fatal(err)
}
}Example 2: Code Review and Fix
An agent that reviews a file for issues, applies fixes, and reports what changed. The agent reads the source, identifies problems, rewrites the file, and runs the test suite to confirm nothing broke.
from agentfield import Agent
from pydantic import BaseModel
from typing import List, Literal
app = Agent(
"code-reviewer",
harness_config={
"provider": "claude-code",
"model": "sonnet",
"max_turns": 25,
"max_budget_usd": 4.0,
"tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
)
class CodeIssue(BaseModel):
line: int
category: Literal["bug", "style", "performance", "security", "maintainability"]
description: str
fixed: bool
class ReviewResult(BaseModel):
file_reviewed: str
issues_found: List[CodeIssue]
issues_fixed: int
all_tests_pass: bool
summary: str
@app.reasoner()
async def review_and_fix(file_path: str, repo_root: str, focus: List[str]):
"""
Reviews a source file for issues in the specified categories,
applies fixes where possible, runs the test suite, and returns
a structured report of what was found and changed.
"""
focus_str = ", ".join(focus) if focus else "all categories"
result = await app.harness(
f"Review the file at {file_path} for code issues. "
f"Focus on: {focus_str}. "
f"For each issue found: record the line number, category, and a one-sentence description. "
f"Apply fixes directly to the file where you can. "
f"After fixing, run the test suite from {repo_root} and check if all tests pass. "
f"Output a JSON object with: file_reviewed, issues_found (list of objects with "
f"line, category, description, fixed), issues_fixed (count), "
f"all_tests_pass (bool), summary (one paragraph).",
schema=ReviewResult,
cwd=repo_root,
)
if result.is_error:
app.note(
f"Review failed for {file_path}: {result.error_message}",
tags=["error", "review"],
)
return {
"status": "failed",
"file": file_path,
"error": result.error_message,
}
review = result.parsed
unfixed = [i for i in review.issues_found if not i.fixed]
app.note(
f"Reviewed {review.file_reviewed}: "
f"{len(review.issues_found)} issues found, "
f"{review.issues_fixed} fixed, "
f"{len(unfixed)} require manual attention. "
f"Tests {'pass' if review.all_tests_pass else 'FAIL'}. "
f"Cost: ${result.cost_usd:.4f}.",
tags=["review", "harness"],
)
return {
"status": "completed",
"file": review.file_reviewed,
"issues_found": len(review.issues_found),
"issues_fixed": review.issues_fixed,
"unfixed_issues": [
{"line": i.line, "category": i.category, "description": i.description}
for i in unfixed
],
"all_tests_pass": review.all_tests_pass,
"summary": review.summary,
"cost_usd": result.cost_usd,
"turns": result.num_turns,
}import { Agent } from "@agentfield/sdk";
import { z } from "zod";
const CodeIssue = z.object({
line: z.number(),
category: z.enum([
"bug",
"style",
"performance",
"security",
"maintainability",
]),
description: z.string(),
fixed: z.boolean(),
});
const ReviewResult = z.object({
fileReviewed: z.string(),
issuesFound: z.array(CodeIssue),
issuesFixed: z.number(),
allTestsPass: z.boolean(),
summary: z.string(),
});
const app = new Agent("code-reviewer", {
harnessConfig: {
provider: "claude-code",
model: "sonnet",
maxTurns: 25,
maxBudgetUsd: 4.0,
tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
});
app.reasoner(
"reviewAndFix",
async (
{
filePath,
repoRoot,
focus,
}: { filePath: string; repoRoot: string; focus: string[] },
ctx
) => {
const focusStr = focus.length > 0 ? focus.join(", ") : "all categories";
const result = await app.harness(
`Review the file at ${filePath} for code issues. ` +
`Focus on: ${focusStr}. ` +
`For each issue found: record the line number, category, and a one-sentence description. ` +
`Apply fixes directly to the file where you can. ` +
`After fixing, run the test suite from ${repoRoot} and check if all tests pass. ` +
`Output a JSON object with: fileReviewed, issuesFound (list of objects with ` +
`line, category, description, fixed), issuesFixed (count), ` +
`allTestsPass (bool), summary (one paragraph).`,
{
schema: ReviewResult,
cwd: repoRoot,
}
);
if (result.isError) {
ctx.note(`Review failed for ${filePath}: ${result.errorMessage}`, {
tags: ["error", "review"],
});
return { status: "failed", file: filePath, error: result.errorMessage };
}
const review = result.parsed!;
const unfixed = review.issuesFound.filter((i) => !i.fixed);
ctx.note(
`Reviewed ${review.fileReviewed}: ` +
`${review.issuesFound.length} issues found, ` +
`${review.issuesFixed} fixed, ` +
`${unfixed.length} require manual attention. ` +
`Tests ${review.allTestsPass ? "pass" : "FAIL"}. ` +
`Cost: $${result.costUsd?.toFixed(4)}.`,
{ tags: ["review", "harness"] }
);
return {
status: "completed",
file: review.fileReviewed,
issuesFound: review.issuesFound.length,
issuesFixed: review.issuesFixed,
unfixedIssues: unfixed.map((i) => ({
line: i.line,
category: i.category,
description: i.description,
})),
allTestsPass: review.allTestsPass,
summary: review.summary,
costUsd: result.costUsd,
turns: result.numTurns,
};
}
);
app.start();package main
import (
"context"
"fmt"
"log"
"strings"
"github.com/agentfield/sdk-go/agentfield"
)
type CodeIssue struct {
Line int `json:"line"`
Category string `json:"category"`
Description string `json:"description"`
Fixed bool `json:"fixed"`
}
type ReviewResult struct {
FileReviewed string `json:"file_reviewed"`
IssuesFound []CodeIssue `json:"issues_found"`
IssuesFixed int `json:"issues_fixed"`
AllTestsPass bool `json:"all_tests_pass"`
Summary string `json:"summary"`
}
func main() {
app := agentfield.NewAgent("code-reviewer",
agentfield.WithHarnessConfig(agentfield.HarnessConfig{
Provider: "claude-code",
Model: "sonnet",
MaxTurns: 25,
MaxBudgetUSD: 4.0,
Tools: []string{"Read", "Write", "Edit", "Bash", "Glob", "Grep"},
}),
)
app.Reasoner("reviewAndFix", func(ctx context.Context, params struct {
FilePath string `json:"file_path"`
RepoRoot string `json:"repo_root"`
Focus []string `json:"focus"`
}) (map[string]any, error) {
focusStr := "all categories"
if len(params.Focus) > 0 {
focusStr = strings.Join(params.Focus, ", ")
}
prompt := fmt.Sprintf(
"Review the file at %s for code issues. "+
"Focus on: %s. "+
"For each issue found: record the line number, category, and a one-sentence description. "+
"Apply fixes directly to the file where you can. "+
"After fixing, run the test suite from %s and check if all tests pass. "+
"Output a JSON object with: file_reviewed, issues_found (list of objects with "+
"line, category, description, fixed), issues_fixed (count), "+
"all_tests_pass (bool), summary (one paragraph).",
params.FilePath, focusStr, params.RepoRoot,
)
var review ReviewResult
result, err := app.Harness(ctx, prompt,
agentfield.WithSchema(&review),
agentfield.WithCwd(params.RepoRoot),
)
if err != nil {
return nil, fmt.Errorf("harness error: %w", err)
}
if result.IsError {
app.Note(ctx, fmt.Sprintf("Review failed: %s", result.ErrorMessage),
agentfield.WithTags("error", "review"))
return map[string]any{
"status": "failed",
"file": params.FilePath,
"error": result.ErrorMessage,
}, nil
}
var unfixed []map[string]any
for _, issue := range review.IssuesFound {
if !issue.Fixed {
unfixed = append(unfixed, map[string]any{
"line": issue.Line,
"category": issue.Category,
"description": issue.Description,
})
}
}
app.Note(ctx,
fmt.Sprintf("Reviewed %s: %d issues, %d fixed. Tests: %v. Cost: $%.4f",
review.FileReviewed, len(review.IssuesFound),
review.IssuesFixed, review.AllTestsPass, result.CostUSD),
agentfield.WithTags("review", "harness"),
)
return map[string]any{
"status": "completed",
"file": review.FileReviewed,
"issues_found": len(review.IssuesFound),
"issues_fixed": review.IssuesFixed,
"unfixed_issues": unfixed,
"all_tests_pass": review.AllTestsPass,
"summary": review.Summary,
"cost_usd": result.CostUSD,
"turns": result.NumTurns,
}, nil
})
if err := app.Start(); err != nil {
log.Fatal(err)
}
}Example 3: Multi-Provider Comparison
An agent that runs the same task on two different providers in parallel, then compares the results by cost, turn count, and output quality. Useful for benchmarking providers before committing to one for production.
import asyncio
from agentfield import Agent
from pydantic import BaseModel
from typing import Optional
app = Agent(
"provider-benchmarker",
harness_config={
"max_turns": 20,
"max_budget_usd": 3.0,
"tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
)
class TaskOutput(BaseModel):
implementation_file: str
lines_of_code: int
tests_pass: bool
approach_summary: str
class ComparisonReport(BaseModel):
task: str
provider_a: str
provider_b: str
provider_a_cost_usd: Optional[float]
provider_b_cost_usd: Optional[float]
provider_a_turns: int
provider_b_turns: int
provider_a_tests_pass: bool
provider_b_tests_pass: bool
provider_a_lines: int
provider_b_lines: int
winner_by_cost: str
winner_by_turns: str
recommendation: str
@app.reasoner()
async def compare_providers(task_description: str, work_dir: str):
"""
Runs the same task on two providers simultaneously.
Compares cost, turn count, and whether tests pass.
Returns a structured comparison report.
"""
prompt = (
f"{task_description} "
f"Write the implementation to {work_dir}/. "
f"Include unit tests and run them. "
f"Output a JSON object with: implementation_file, lines_of_code (int), "
f"tests_pass (bool), approach_summary (one sentence)."
)
# Run both providers concurrently
result_a, result_b = await asyncio.gather(
app.harness(
prompt,
schema=TaskOutput,
provider="claude-code",
model="sonnet",
cwd=work_dir,
),
app.harness(
prompt,
schema=TaskOutput,
provider="codex",
model="codex-large",
cwd=work_dir,
),
return_exceptions=True,
)
# Handle provider-level failures gracefully
a_ok = not isinstance(result_a, Exception) and not result_a.is_error
b_ok = not isinstance(result_b, Exception) and not result_b.is_error
if not a_ok and not b_ok:
return {
"status": "both_failed",
"error_a": str(result_a) if isinstance(result_a, Exception) else result_a.error_message,
"error_b": str(result_b) if isinstance(result_b, Exception) else result_b.error_message,
}
a_cost = result_a.cost_usd if a_ok else None
b_cost = result_b.cost_usd if b_ok else None
a_turns = result_a.num_turns if a_ok else 0
b_turns = result_b.num_turns if b_ok else 0
a_output = result_a.parsed if a_ok else None
b_output = result_b.parsed if b_ok else None
winner_cost = "claude-code" if (a_cost or 999) < (b_cost or 999) else "codex"
winner_turns = "claude-code" if a_turns < b_turns else "codex"
recommendation = (
f"{'claude-code' if winner_cost == winner_cost and winner_turns == 'claude-code' else 'codex'} "
f"performed better on this task. "
f"claude-code: ${a_cost:.4f if a_cost else 'N/A'}, {a_turns} turns. "
f"codex: ${b_cost:.4f if b_cost else 'N/A'}, {b_turns} turns."
)
app.note(
f"Comparison complete. {recommendation}",
tags=["benchmark", "harness"],
)
return {
"status": "completed",
"task": task_description,
"provider_a": "claude-code",
"provider_b": "codex",
"provider_a_cost_usd": a_cost,
"provider_b_cost_usd": b_cost,
"provider_a_turns": a_turns,
"provider_b_turns": b_turns,
"provider_a_tests_pass": a_output.tests_pass if a_output else False,
"provider_b_tests_pass": b_output.tests_pass if b_output else False,
"provider_a_lines": a_output.lines_of_code if a_output else 0,
"provider_b_lines": b_output.lines_of_code if b_output else 0,
"winner_by_cost": winner_cost,
"winner_by_turns": winner_turns,
"recommendation": recommendation,
}import { Agent } from "@agentfield/sdk";
import { z } from "zod";
const TaskOutput = z.object({
implementationFile: z.string(),
linesOfCode: z.number(),
testsPass: z.boolean(),
approachSummary: z.string(),
});
const app = new Agent("provider-benchmarker", {
harnessConfig: {
maxTurns: 20,
maxBudgetUsd: 3.0,
tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
});
app.reasoner(
"compareProviders",
async (
{
taskDescription,
workDir,
}: { taskDescription: string; workDir: string },
ctx
) => {
const prompt =
`${taskDescription} ` +
`Write the implementation to ${workDir}/. ` +
`Include unit tests and run them. ` +
`Output a JSON object with: implementationFile, linesOfCode (int), ` +
`testsPass (bool), approachSummary (one sentence).`;
// Run both providers concurrently
const [resultA, resultB] = await Promise.allSettled([
app.harness(prompt, {
schema: TaskOutput,
provider: "claude-code",
model: "sonnet",
cwd: workDir,
}),
app.harness(prompt, {
schema: TaskOutput,
provider: "codex",
model: "codex-large",
cwd: workDir,
}),
]);
const aOk =
resultA.status === "fulfilled" &&
!resultA.value.isError &&
resultA.value.parsed != null;
const bOk =
resultB.status === "fulfilled" &&
!resultB.value.isError &&
resultB.value.parsed != null;
if (!aOk && !bOk) {
return {
status: "both_failed",
errorA:
resultA.status === "rejected"
? String(resultA.reason)
: (resultA.value as any).errorMessage,
errorB:
resultB.status === "rejected"
? String(resultB.reason)
: (resultB.value as any).errorMessage,
};
}
const aResult = aOk ? (resultA as PromiseFulfilledResult<any>).value : null;
const bResult = bOk ? (resultB as PromiseFulfilledResult<any>).value : null;
const aCost = aResult?.costUsd ?? null;
const bCost = bResult?.costUsd ?? null;
const aTurns = aResult?.numTurns ?? 0;
const bTurns = bResult?.numTurns ?? 0;
const winnerCost =
(aCost ?? Infinity) < (bCost ?? Infinity) ? "claude-code" : "codex";
const winnerTurns = aTurns < bTurns ? "claude-code" : "codex";
const recommendation =
`claude-code: $${aCost?.toFixed(4) ?? "N/A"}, ${aTurns} turns. ` +
`codex: $${bCost?.toFixed(4) ?? "N/A"}, ${bTurns} turns. ` +
`Winner by cost: ${winnerCost}. Winner by turns: ${winnerTurns}.`;
ctx.note(`Comparison complete. ${recommendation}`, {
tags: ["benchmark", "harness"],
});
return {
status: "completed",
taskDescription,
providerA: "claude-code",
providerB: "codex",
providerACostUsd: aCost,
providerBCostUsd: bCost,
providerATurns: aTurns,
providerBTurns: bTurns,
providerATestsPass: aResult?.parsed?.testsPass ?? false,
providerBTestsPass: bResult?.parsed?.testsPass ?? false,
providerALines: aResult?.parsed?.linesOfCode ?? 0,
providerBLines: bResult?.parsed?.linesOfCode ?? 0,
winnerByCost: winnerCost,
winnerByTurns: winnerTurns,
recommendation,
};
}
);
app.start();Parallel Harness Calls
asyncio.gather (Python) and Promise.allSettled (TypeScript) let you run multiple harness calls concurrently. Each call is independent — they don't share state. Use return_exceptions=True in Python so one provider failure doesn't cancel the other.
Example 4: Composing with Human-in-the-Loop
An agent that uses the harness to implement a feature, then pauses for human review before the changes are deployed. The agent does the work; a human approves before anything goes to production.
from agentfield import Agent
from pydantic import BaseModel
from typing import List
app = Agent(
"feature-implementer",
harness_config={
"provider": "claude-code",
"model": "sonnet",
"max_turns": 40,
"max_budget_usd": 8.0,
"tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
"permission_mode": "plan",
},
)
class ImplementationResult(BaseModel):
files_created: List[str]
files_modified: List[str]
tests_added: int
all_tests_pass: bool
pr_description: str
breaking_changes: bool
@app.reasoner()
async def implement_and_review(
feature_spec: str,
repo_root: str,
review_url_template: str,
execution_id: str,
):
"""
Implements a feature using the harness, then pauses for human review.
If approved, returns the implementation details for the deploy step.
If rejected, returns the reviewer's feedback for iteration.
review_url_template: URL where the reviewer can inspect the diff,
e.g. "https://review.example.com/features/{execution_id}"
"""
# Step 1: Implement the feature
result = await app.harness(
f"Implement the following feature in the repository at {repo_root}: "
f"{feature_spec}. "
f"Follow existing code conventions. "
f"Add tests for all new behavior. "
f"Run the full test suite and confirm it passes. "
f"Do not commit or push — leave changes staged. "
f"Output a JSON object with: files_created (list), files_modified (list), "
f"tests_added (int), all_tests_pass (bool), "
f"pr_description (markdown string suitable for a pull request body), "
f"breaking_changes (bool).",
schema=ImplementationResult,
cwd=repo_root,
)
if result.is_error:
app.note(
f"Implementation failed: {result.error_message}",
tags=["error", "harness"],
)
return {
"status": "implementation_failed",
"error": result.error_message,
"cost_usd": result.cost_usd,
}
impl = result.parsed
if not impl.all_tests_pass:
app.note(
"Implementation complete but tests are failing. Returning for retry.",
tags=["warning", "harness"],
)
return {
"status": "tests_failing",
"files_created": impl.files_created,
"files_modified": impl.files_modified,
"cost_usd": result.cost_usd,
}
app.note(
f"Implementation complete: {len(impl.files_created)} files created, "
f"{len(impl.files_modified)} modified. Tests pass. "
f"Breaking changes: {impl.breaking_changes}. "
f"Pausing for human review.",
tags=["harness", "hitl"],
)
# Step 2: Pause for human review
review_url = review_url_template.format(execution_id=execution_id)
approval = await app.pause(
approval_request_id=f"feature-review-{execution_id}",
approval_request_url=review_url,
expires_in_hours=72,
)
if not approval.approved:
app.note(
f"Review rejected. Feedback: {approval.feedback}",
tags=["hitl", "rejected"],
)
return {
"status": "rejected",
"feedback": approval.feedback,
"files_created": impl.files_created,
"files_modified": impl.files_modified,
"pr_description": impl.pr_description,
}
app.note(
f"Review approved by {approval.reviewer_id}. Ready to deploy.",
tags=["hitl", "approved"],
)
return {
"status": "approved",
"files_created": impl.files_created,
"files_modified": impl.files_modified,
"tests_added": impl.tests_added,
"all_tests_pass": impl.all_tests_pass,
"pr_description": impl.pr_description,
"breaking_changes": impl.breaking_changes,
"approved_by": approval.reviewer_id,
"implementation_cost_usd": result.cost_usd,
"implementation_turns": result.num_turns,
}import { Agent } from "@agentfield/sdk";
import { z } from "zod";
const ImplementationResult = z.object({
filesCreated: z.array(z.string()),
filesModified: z.array(z.string()),
testsAdded: z.number(),
allTestsPass: z.boolean(),
prDescription: z.string(),
breakingChanges: z.boolean(),
});
const app = new Agent("feature-implementer", {
harnessConfig: {
provider: "claude-code",
model: "sonnet",
maxTurns: 40,
maxBudgetUsd: 8.0,
tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
permissionMode: "plan",
},
});
app.reasoner(
"implementAndReview",
async (
{
featureSpec,
repoRoot,
reviewUrlTemplate,
executionId,
}: {
featureSpec: string;
repoRoot: string;
reviewUrlTemplate: string;
executionId: string;
},
ctx
) => {
const result = await app.harness(
`Implement the following feature in the repository at ${repoRoot}: ` +
`${featureSpec}. ` +
`Follow existing code conventions. ` +
`Add tests for all new behavior. ` +
`Run the full test suite and confirm it passes. ` +
`Do not commit or push — leave changes staged. ` +
`Output a JSON object with: filesCreated (list), filesModified (list), ` +
`testsAdded (int), allTestsPass (bool), ` +
`prDescription (markdown string suitable for a pull request body), ` +
`breakingChanges (bool).`,
{
schema: ImplementationResult,
cwd: repoRoot,
}
);
if (result.isError) {
ctx.note(`Implementation failed: ${result.errorMessage}`, {
tags: ["error", "harness"],
});
return {
status: "implementation_failed",
error: result.errorMessage,
costUsd: result.costUsd,
};
}
const impl = result.parsed!;
if (!impl.allTestsPass) {
ctx.note("Implementation complete but tests are failing.", {
tags: ["warning", "harness"],
});
return {
status: "tests_failing",
filesCreated: impl.filesCreated,
filesModified: impl.filesModified,
costUsd: result.costUsd,
};
}
ctx.note(
`Implementation complete. ${impl.filesCreated.length} files created, ` +
`${impl.filesModified.length} modified. Tests pass. Pausing for review.`,
{ tags: ["harness", "hitl"] }
);
const reviewUrl = reviewUrlTemplate.replace("{executionId}", executionId);
const approval = await app.pause({
approvalRequestId: `feature-review-${executionId}`,
approvalRequestUrl: reviewUrl,
expiresInHours: 72,
});
if (!approval.approved) {
ctx.note(`Review rejected. Feedback: ${approval.feedback}`, {
tags: ["hitl", "rejected"],
});
return {
status: "rejected",
feedback: approval.feedback,
filesCreated: impl.filesCreated,
filesModified: impl.filesModified,
prDescription: impl.prDescription,
};
}
ctx.note(`Review approved by ${approval.reviewerId}. Ready to deploy.`, {
tags: ["hitl", "approved"],
});
return {
status: "approved",
filesCreated: impl.filesCreated,
filesModified: impl.filesModified,
testsAdded: impl.testsAdded,
allTestsPass: impl.allTestsPass,
prDescription: impl.prDescription,
breakingChanges: impl.breakingChanges,
approvedBy: approval.reviewerId,
implementationCostUsd: result.costUsd,
implementationTurns: result.numTurns,
};
}
);
app.start();permission_mode: plan
Setting permission_mode: "plan" tells the agent to show its plan before writing any files. This gives you a chance to catch misunderstandings before the agent does work. Combine it with app.pause() for a two-stage review: plan approval, then diff approval.
Example 5: Cost-Controlled Batch Processing
An agent that processes a list of tasks with a shared budget. Each task runs sequentially, tracking cumulative spend. When the budget is exhausted, remaining tasks are skipped and reported.
from agentfield import Agent
from pydantic import BaseModel
from typing import List, Optional
app = Agent(
"batch-processor",
harness_config={
"provider": "claude-code",
"model": "sonnet",
"max_turns": 20,
"tools": ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
)
class TaskSpec(BaseModel):
task_id: str
description: str
target_file: str
priority: int # 1 = highest
class TaskOutcome(BaseModel):
task_id: str
status: str # "completed", "failed", "skipped"
cost_usd: Optional[float]
turns: Optional[int]
error: Optional[str]
tests_pass: Optional[bool]
class BatchResult(BaseModel):
total_tasks: int
completed: int
failed: int
skipped: int
total_cost_usd: float
budget_usd: float
budget_remaining_usd: float
outcomes: List[TaskOutcome]
class SingleTaskOutput(BaseModel):
tests_pass: bool
summary: str
@app.reasoner()
async def process_batch(
tasks: List[dict],
repo_root: str,
budget_usd: float,
):
"""
Processes a list of tasks in priority order.
Tracks cumulative cost against the budget.
Skips remaining tasks when the budget is exhausted.
Returns a full report of what was completed, failed, or skipped.
"""
# Sort by priority (1 = highest)
task_specs = sorted(
[TaskSpec(**t) for t in tasks],
key=lambda t: t.priority,
)
outcomes: List[TaskOutcome] = []
total_cost = 0.0
completed = 0
failed = 0
skipped = 0
for spec in task_specs:
remaining_budget = budget_usd - total_cost
if remaining_budget <= 0.0:
app.note(
f"Budget exhausted (${total_cost:.4f} / ${budget_usd:.2f}). "
f"Skipping task {spec.task_id}: {spec.description[:60]}",
tags=["budget", "skipped"],
)
outcomes.append(TaskOutcome(
task_id=spec.task_id,
status="skipped",
cost_usd=None,
turns=None,
error="Budget exhausted",
tests_pass=None,
))
skipped += 1
continue
app.note(
f"Starting task {spec.task_id} (priority {spec.priority}). "
f"Budget remaining: ${remaining_budget:.4f}.",
tags=["batch", "harness"],
)
result = await app.harness(
f"In the repository at {repo_root}, perform the following task: "
f"{spec.description}. "
f"The primary file to work on is {spec.target_file}. "
f"Run the test suite after making changes. "
f"Output a JSON object with: tests_pass (bool), summary (one sentence).",
schema=SingleTaskOutput,
cwd=repo_root,
max_budget_usd=remaining_budget, # Cap this task at remaining budget
)
task_cost = result.cost_usd or 0.0
total_cost += task_cost
if result.is_error:
app.note(
f"Task {spec.task_id} failed: {result.error_message}. "
f"Cost so far: ${total_cost:.4f}.",
tags=["error", "batch"],
)
outcomes.append(TaskOutcome(
task_id=spec.task_id,
status="failed",
cost_usd=task_cost,
turns=result.num_turns,
error=result.error_message,
tests_pass=None,
))
failed += 1
else:
output = result.parsed
app.note(
f"Task {spec.task_id} completed in {result.num_turns} turns "
f"(${task_cost:.4f}). Tests: {'pass' if output.tests_pass else 'FAIL'}.",
tags=["batch", "harness"],
)
outcomes.append(TaskOutcome(
task_id=spec.task_id,
status="completed",
cost_usd=task_cost,
turns=result.num_turns,
error=None,
tests_pass=output.tests_pass,
))
completed += 1
batch_result = BatchResult(
total_tasks=len(task_specs),
completed=completed,
failed=failed,
skipped=skipped,
total_cost_usd=total_cost,
budget_usd=budget_usd,
budget_remaining_usd=max(0.0, budget_usd - total_cost),
outcomes=outcomes,
)
app.note(
f"Batch complete: {completed} done, {failed} failed, {skipped} skipped. "
f"Total cost: ${total_cost:.4f} / ${budget_usd:.2f}.",
tags=["batch", "summary"],
)
return batch_result.dict()import { Agent } from "@agentfield/sdk";
import { z } from "zod";
const TaskSpec = z.object({
taskId: z.string(),
description: z.string(),
targetFile: z.string(),
priority: z.number(),
});
const SingleTaskOutput = z.object({
testsPass: z.boolean(),
summary: z.string(),
});
type TaskOutcome = {
taskId: string;
status: "completed" | "failed" | "skipped";
costUsd: number | null;
turns: number | null;
error: string | null;
testsPass: boolean | null;
};
const app = new Agent("batch-processor", {
harnessConfig: {
provider: "claude-code",
model: "sonnet",
maxTurns: 20,
tools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
},
});
app.reasoner(
"processBatch",
async (
{
tasks,
repoRoot,
budgetUsd,
}: { tasks: unknown[]; repoRoot: string; budgetUsd: number },
ctx
) => {
const taskSpecs = tasks
.map((t) => TaskSpec.parse(t))
.sort((a, b) => a.priority - b.priority);
const outcomes: TaskOutcome[] = [];
let totalCost = 0;
let completed = 0;
let failed = 0;
let skipped = 0;
for (const spec of taskSpecs) {
const remainingBudget = budgetUsd - totalCost;
if (remainingBudget <= 0) {
ctx.note(
`Budget exhausted ($${totalCost.toFixed(4)} / $${budgetUsd.toFixed(2)}). ` +
`Skipping task ${spec.taskId}.`,
{ tags: ["budget", "skipped"] }
);
outcomes.push({
taskId: spec.taskId,
status: "skipped",
costUsd: null,
turns: null,
error: "Budget exhausted",
testsPass: null,
});
skipped++;
continue;
}
ctx.note(
`Starting task ${spec.taskId} (priority ${spec.priority}). ` +
`Budget remaining: $${remainingBudget.toFixed(4)}.`,
{ tags: ["batch", "harness"] }
);
const result = await app.harness(
`In the repository at ${repoRoot}, perform the following task: ` +
`${spec.description}. ` +
`The primary file to work on is ${spec.targetFile}. ` +
`Run the test suite after making changes. ` +
`Output a JSON object with: testsPass (bool), summary (one sentence).`,
{
schema: SingleTaskOutput,
cwd: repoRoot,
maxBudgetUsd: remainingBudget,
}
);
const taskCost = result.costUsd ?? 0;
totalCost += taskCost;
if (result.isError) {
ctx.note(
`Task ${spec.taskId} failed: ${result.errorMessage}. ` +
`Cost so far: $${totalCost.toFixed(4)}.`,
{ tags: ["error", "batch"] }
);
outcomes.push({
taskId: spec.taskId,
status: "failed",
costUsd: taskCost,
turns: result.numTurns,
error: result.errorMessage ?? null,
testsPass: null,
});
failed++;
} else {
const output = result.parsed!;
ctx.note(
`Task ${spec.taskId} completed in ${result.numTurns} turns ` +
`($${taskCost.toFixed(4)}). Tests: ${output.testsPass ? "pass" : "FAIL"}.`,
{ tags: ["batch", "harness"] }
);
outcomes.push({
taskId: spec.taskId,
status: "completed",
costUsd: taskCost,
turns: result.numTurns,
error: null,
testsPass: output.testsPass,
});
completed++;
}
}
ctx.note(
`Batch complete: ${completed} done, ${failed} failed, ${skipped} skipped. ` +
`Total cost: $${totalCost.toFixed(4)} / $${budgetUsd.toFixed(2)}.`,
{ tags: ["batch", "summary"] }
);
return {
totalTasks: taskSpecs.length,
completed,
failed,
skipped,
totalCostUsd: totalCost,
budgetUsd,
budgetRemainingUsd: Math.max(0, budgetUsd - totalCost),
outcomes,
};
}
);
app.start();Per-Call Budget Caps
Passing max_budget_usd (Python) or maxBudgetUsd (TypeScript) on individual harness calls caps the spend for that specific task. The batch example uses the remaining budget as the per-task cap, so a single expensive task can't consume the entire batch budget.
Key Patterns
Next Steps
Harness Concept Guide
Full API reference: HarnessConfig, HarnessResult, schema validation, provider setup
Reasoners and Skills
How to structure agent logic and compose capabilities
Async Execution
Run long harness tasks asynchronously with webhooks
Human-in-the-Loop
More patterns for pausing agent workflows for human review