Building a Minimal Copilot: End-to-End Implementation
All the previous articles have covered pieces: indexing, retrieval, file editing, execution, testing, planning, sandboxing. This article assembles them into a working copilot. You'll implement a minimal but functional coding agent in ~300 lines of Python that can read a repo, understand a task, edit files, run tests, and iterate until the task is complete.
This is the moment where theory becomes practice. By the end, you'll have a working agent that you can extend, test, and deploy.
Architecture Overview
The minimal copilot has five components:
┌─────────────────────────────────────────────────────┐
│ 1. Repository Indexer │
│ - AST parse all .py files │
│ - Store: functions, signatures, imports │
└────────────────────┬────────────────────────────────┘
│
┌────────────────────v────────────────────────────────┐
│ 2. Context Retriever │
│ - Keyword search + semantic matching │
│ - Return top-5 relevant files │
└────────────────────┬────────────────────────────────┘
│
┌────────────────────v────────────────────────────────┐
│ 3. Agent Loop │
│ - Receive task → plan → execute tools → iterate │
│ - Tools: read, edit, run_tests, run_command │
└────────────────────┬────────────────────────────────┘
│
┌────────────────────v────────────────────────────────┐
│ 4. Tool Suite │
│ - FileEditor (safe edits with validation) │
│ - CommandRunner (timeout + sandboxing) │
│ - TestRunner (parse test results) │
└────────────────────┬────────────────────────────────┘
│
┌────────────────────v────────────────────────────────┐
│ 5. Validator │
│ - Track success/failure │
│ - Stop when tests pass or max retries │
└─────────────────────────────────────────────────────┘
Implementation: Step 1 — Indexer
import os
import ast
import json
from typing import Dict, List
class RepositoryIndexer:
"""Index a Python repository."""
def __init__(self, repo_path: str):
self.repo_path = repo_path
self.index = {"files": {}, "functions": {}}
def build(self):
"""Index all Python files."""
for root, dirs, files in os.walk(self.repo_path):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for file in files:
if not file.endswith('.py'):
continue
filepath = os.path.join(root, file)
rel_path = os.path.relpath(filepath, self.repo_path)
try:
with open(filepath) as f:
content = f.read()
tree = ast.parse(content)
functions = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
functions.append({
"name": node.name,
"lineno": node.lineno,
"args": [arg.arg for arg in node.args.args]
})
self.index["files"][rel_path] = {
"lines": len(content.split('\n')),
"functions": functions,
"preview": content[:200]
}
for func in functions:
self.index["functions"][func["name"]] = {
"file": rel_path,
"lineno": func["lineno"]
}
except:
pass
def search(self, query: str, top_k: int = 5) -> List[str]:
"""Keyword search for relevant files."""
query_words = set(query.lower().split())
scores = {}
for filepath, metadata in self.index["files"].items():
score = 0
file_text = (filepath + " " + metadata["preview"]).lower()
for word in query_words:
score += file_text.count(word)
if score > 0:
scores[filepath] = score
return [f for f, _ in sorted(scores.items(), key=lambda x: x[1], reverse=True)][:top_k]
Implementation: Step 2 — Tools
import subprocess
import tempfile
class FileEditor:
"""Safe file editing with validation."""
@staticmethod
def edit(filepath: str, old_text: str, new_text: str) -> dict:
"""Edit file with exact-match validation."""
try:
with open(filepath) as f:
current = f.read()
except:
return {"success": False, "error": "File not found"}
if old_text not in current:
return {"success": False, "error": "old_text not found"}
if current.count(old_text) > 1:
return {"success": False, "error": "old_text appears multiple times"}
new_content = current.replace(old_text, new_text)
try:
with open(filepath, 'w') as f:
f.write(new_content)
return {"success": True}
except:
return {"success": False, "error": "Write failed"}
class CommandRunner:
"""Run commands safely."""
@staticmethod
def run(command: str, timeout: int = 10) -> dict:
"""Run command with timeout and output capture."""
try:
result = subprocess.run(
command,
shell=True,
capture_output=True,
text=True,
timeout=timeout
)
return {
"success": result.returncode == 0,
"exit_code": result.returncode,
"stdout": result.stdout,
"stderr": result.stderr
}
except subprocess.TimeoutExpired:
return {
"success": False,
"error": f"Timeout after {timeout}s",
"exit_code": 124
}
class TestRunner:
"""Run tests and parse results."""
@staticmethod
def run(test_dir: str = "tests/") -> dict:
"""Run pytest and return structured result."""
result = CommandRunner.run(f"pytest {test_dir} -v --tb=short", timeout=20)
passed = result.get("stdout", "").count(" PASSED")
failed = result.get("stdout", "").count(" FAILED")
return {
"success": result["success"],
"passed": passed,
"failed": failed,
"output": result.get("stdout", "") + result.get("stderr", "")
}
Implementation: Step 3 — Agent Loop
from anthropic import Anthropic
class CodingAgent:
"""Minimal coding agent."""
def __init__(self, repo_path: str):
self.repo_path = repo_path
self.client = Anthropic()
self.indexer = RepositoryIndexer(repo_path)
self.indexer.build()
self.file_editor = FileEditor()
self.command_runner = CommandRunner()
self.test_runner = TestRunner()
self.max_iterations = 5
def run(self, task: str) -> dict:
"""Execute a task using the agent loop."""
# Retrieve relevant context
relevant_files = self.indexer.search(task, top_k=5)
context_text = self._load_context(relevant_files)
# Build system prompt
system_prompt = f"""You are a code-editing agent. Your goal is to complete this task:
{task}
Here is relevant code from the repository:
{context_text}
You have access to these tools:
- read_file(path): Read a file
- edit_file(path, old_text, new_text): Edit a file safely
- run_tests(): Run pytest and see results
- run_command(command): Run any shell command
After each edit, run tests. If tests pass, you're done.
If tests fail, analyze the failure and fix the code.
Make targeted edits (provide exact old_text and new_text)."""
messages = [{"role": "user", "content": task}]
for iteration in range(self.max_iterations):
# Get agent response
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
system=system_prompt,
tools=[
{
"name": "read_file",
"description": "Read a file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string"}
},
"required": ["path"]
}
},
{
"name": "edit_file",
"description": "Edit file (old_text → new_text)",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string"},
"old_text": {"type": "string"},
"new_text": {"type": "string"}
},
"required": ["path", "old_text", "new_text"]
}
},
{
"name": "run_tests",
"description": "Run pytest",
"input_schema": {"type": "object", "properties": {}}
},
{
"name": "run_command",
"description": "Run shell command",
"input_schema": {
"type": "object",
"properties": {
"command": {"type": "string"}
},
"required": ["command"]
}
}
],
messages=messages
)
# Check if done
if response.stop_reason == "end_turn":
return {
"success": True,
"message": response.content[0].text,
"iterations": iteration
}
# Process tool calls
for tool_use in response.content:
if tool_use.type != "tool_use":
continue
tool_name = tool_use.name
tool_input = tool_use.input
if tool_name == "read_file":
result = {"content": open(tool_input["path"]).read()}
elif tool_name == "edit_file":
result = self.file_editor.edit(
tool_input["path"],
tool_input["old_text"],
tool_input["new_text"]
)
elif tool_name == "run_tests":
result = self.test_runner.run()
elif tool_name == "run_command":
result = self.command_runner.run(tool_input["command"])
# Add to conversation
messages.append({
"role": "assistant",
"content": response.content
})
messages.append({
"role": "user",
"content": f"Tool result:\n{json.dumps(result)[:500]}"
})
return {
"success": False,
"message": "Max iterations reached",
"iterations": self.max_iterations
}
def _load_context(self, files: List[str]) -> str:
"""Load and format context from files."""
context = ""
for filepath in files:
try:
with open(os.path.join(self.repo_path, filepath)) as f:
content = f.read()
context += f"\n\n--- {filepath} ---\n{content[:1000]}"
except:
pass
return context
Usage Example
# Initialize agent
agent = CodingAgent("./my_repo")
# Run a task
result = agent.run("""
Add a function called 'add_numbers' that takes two parameters
and returns their sum. Make sure existing tests pass.
""")
print(f"Success: {result['success']}")
print(f"Message: {result['message']}")
print(f"Iterations: {result['iterations']}")
Extending the Copilot
To make it production-ready, add:
- Error handling: Wrap everything in try-except, return clear error messages.
- Logging: Log every agent action for debugging.
- Sandboxing: Run agent in a Docker container.
- Persistence: Save agent state between sessions.
- Multi-language: Support Python, TypeScript, Go, Rust.
- Persistence: Save successful edits to git.
Example extension (sandboxing):
class SandboxedAgent(CodingAgent):
"""Run agent in a Docker container."""
def run(self, task: str) -> dict:
"""Override to use container."""
# Create container with repo mounted
container_cmd = f"""
docker run --rm \\
-v {self.repo_path}:/app \\
-w /app \\
--network=none \\
python:3.11 \\
python agent.py "{task}"
"""
# Run in container and return result
result = subprocess.run(container_cmd, shell=True, capture_output=True)
return json.loads(result.stdout)
Key Takeaways
- A minimal but functional copilot can be built in ~300 lines.
- Index once, retrieve relevant context, then loop.
- Use exact-match validation for safe edits.
- Parse test output to give the agent clear feedback.
- Add sandboxing, logging, and persistence for production use.
Frequently Asked Questions
Is this production-ready?
No—it's a foundation. Add error handling, retry logic, better sandboxing, and logging before deploying. The code is meant to be readable and extensible, not bulletproof.
How do I test the agent?
Write small test repos with a few functions and tests. Run the agent on "add a function", "refactor this", "fix the bug". Measure success rate and iteration count.
Can I use this with non-Python code?
Yes—swap out the indexer (use language-specific AST parsers) and test runner (use different test frameworks). The agent loop is language-agnostic.
How long does it take to run?
For a small repo (100 files), indexing takes ~1s. Each agent iteration (LLM call + tool execution) takes 5–10s. A simple task completes in 10–30s.