From 5010f82c3e2c823fd0def95843725eea6242c302 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Thu, 22 Jan 2026 04:16:39 -0800 Subject: [PATCH] feat: package as Claude Code plugin with marketplace distribution - Add .claude-plugin/plugin.json manifest for direct installation - Add .claude-plugin/marketplace.json for marketplace distribution - Reorganize skills to proper skill-name/SKILL.md format - Update hooks.json with relative paths for portability - Add new skills: continuous-learning, strategic-compact, eval-harness, verification-loop - Add new commands: checkpoint, eval, orchestrate, verify - Update README with plugin installation instructions Install via: /plugin marketplace add affaan-m/everything-claude-code /plugin install everything-claude-code@everything-claude-code --- .claude-plugin/marketplace.json | 46 ++++ .claude-plugin/plugin.json | 33 +++ .gitignore | 3 + README.md | 73 ++++-- commands/checkpoint.md | 74 ++++++ commands/eval.md | 120 ++++++++++ commands/orchestrate.md | 172 ++++++++++++++ commands/verify.md | 59 +++++ hooks/hooks.json | 10 +- .../SKILL.md} | 0 .../SKILL.md} | 0 .../SKILL.md} | 0 skills/continuous-learning/SKILL.md | 80 +++++++ skills/eval-harness/SKILL.md | 221 ++++++++++++++++++ .../SKILL.md} | 0 .../SKILL.md} | 0 skills/strategic-compact/SKILL.md | 63 +++++ skills/verification-loop/SKILL.md | 120 ++++++++++ 18 files changed, 1054 insertions(+), 20 deletions(-) create mode 100644 .claude-plugin/marketplace.json create mode 100644 .claude-plugin/plugin.json create mode 100644 commands/checkpoint.md create mode 100644 commands/eval.md create mode 100644 commands/orchestrate.md create mode 100644 commands/verify.md rename skills/{backend-patterns.md => backend-patterns/SKILL.md} (100%) rename skills/{clickhouse-io.md => clickhouse-io/SKILL.md} (100%) rename skills/{coding-standards.md => coding-standards/SKILL.md} (100%) create mode 100644 skills/continuous-learning/SKILL.md create mode 100644 skills/eval-harness/SKILL.md rename skills/{frontend-patterns.md => frontend-patterns/SKILL.md} (100%) rename skills/{project-guidelines-example.md => project-guidelines-example/SKILL.md} (100%) create mode 100644 skills/strategic-compact/SKILL.md create mode 100644 skills/verification-loop/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000..854edc0 --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,46 @@ +{ + "name": "everything-claude-code", + "owner": { + "name": "Affaan Mustafa", + "email": "affaan@example.com" + }, + "metadata": { + "description": "Battle-tested Claude Code configurations from an Anthropic hackathon winner", + "version": "1.0.0" + }, + "plugins": [ + { + "name": "everything-claude-code", + "source": ".", + "description": "Complete collection of agents, skills, hooks, commands, and rules evolved over 10+ months of intensive daily use", + "version": "1.0.0", + "author": { + "name": "Affaan Mustafa" + }, + "homepage": "https://github.com/affaan-m/everything-claude-code", + "repository": "https://github.com/affaan-m/everything-claude-code", + "license": "MIT", + "keywords": [ + "agents", + "skills", + "hooks", + "commands", + "tdd", + "code-review", + "security", + "best-practices" + ], + "category": "workflow", + "tags": [ + "agents", + "skills", + "hooks", + "commands", + "tdd", + "code-review", + "security", + "best-practices" + ] + } + ] +} diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..7480db8 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,33 @@ +{ + "name": "everything-claude-code", + "version": "1.0.0", + "description": "Complete collection of battle-tested Claude Code configs from an Anthropic hackathon winner - agents, skills, hooks, commands, and rules evolved over 10+ months of intensive daily use", + "author": { + "name": "Affaan Mustafa", + "url": "https://x.com/affaanmustafa" + }, + "homepage": "https://github.com/affaan-m/everything-claude-code", + "repository": { + "type": "git", + "url": "https://github.com/affaan-m/everything-claude-code.git" + }, + "license": "MIT", + "keywords": [ + "claude-code", + "agents", + "skills", + "hooks", + "commands", + "rules", + "tdd", + "code-review", + "security", + "workflow", + "automation", + "best-practices" + ], + "commands": "./commands", + "agents": "./agents", + "skills": "./skills", + "hooks": "./hooks/hooks.json" +} diff --git a/.gitignore b/.gitignore index 2536bb4..d947b2c 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,6 @@ node_modules/ # Personal configs (if any) personal/ private/ + +# Session templates (not committed) +examples/sessions/*.tmp diff --git a/README.md b/README.md index d2d5968..c598d21 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,14 @@ The advanced techniques - token optimization, memory persistence across sessions ## What's Inside +This repo is a **Claude Code plugin** - install it directly or copy components manually. + ``` everything-claude-code/ +|-- .claude-plugin/ # Plugin and marketplace manifests +| |-- plugin.json # Plugin metadata and component paths +| |-- marketplace.json # Marketplace catalog for /plugin marketplace add +| |-- agents/ # Specialized subagents for delegation | |-- planner.md # Feature implementation planning | |-- architect.md # System design decisions @@ -56,13 +62,15 @@ everything-claude-code/ | |-- doc-updater.md # Documentation sync | |-- skills/ # Workflow definitions and domain knowledge -| |-- coding-standards.md # Language best practices -| |-- backend-patterns.md # API, database, caching patterns -| |-- frontend-patterns.md # React, Next.js patterns +| |-- coding-standards/ # Language best practices +| |-- backend-patterns/ # API, database, caching patterns +| |-- frontend-patterns/ # React, Next.js patterns | |-- continuous-learning/ # Auto-extract patterns from sessions (Longform Guide) | |-- strategic-compact/ # Manual compaction suggestions (Longform Guide) | |-- tdd-workflow/ # TDD methodology | |-- security-review/ # Security checklist +| |-- eval-harness/ # Verification loop evaluation (Longform Guide) +| |-- verification-loop/ # Continuous verification (Longform Guide) | |-- commands/ # Slash commands for quick execution | |-- tdd.md # /tdd - Test-driven development @@ -72,8 +80,10 @@ everything-claude-code/ | |-- build-fix.md # /build-fix - Fix build errors | |-- refactor-clean.md # /refactor-clean - Dead code removal | |-- learn.md # /learn - Extract patterns mid-session (Longform Guide) +| |-- checkpoint.md # /checkpoint - Save verification state (Longform Guide) +| |-- verify.md # /verify - Run verification loop (Longform Guide) | -|-- rules/ # Always-follow guidelines +|-- rules/ # Always-follow guidelines (copy to ~/.claude/rules/) | |-- security.md # Mandatory security checks | |-- coding-style.md # Immutability, file organization | |-- testing.md # TDD, 80% coverage requirement @@ -84,9 +94,6 @@ everything-claude-code/ |-- hooks/ # Trigger-based automations | |-- hooks.json # All hooks config (PreToolUse, PostToolUse, Stop, etc.) | |-- memory-persistence/ # Session lifecycle hooks (Longform Guide) -| | |-- pre-compact.sh # Save state before compaction -| | |-- session-start.sh # Load previous context -| | |-- session-end.sh # Persist learnings on end | |-- strategic-compact/ # Compaction suggestions (Longform Guide) | |-- contexts/ # Dynamic system prompt injection contexts (Longform Guide) @@ -97,20 +104,54 @@ everything-claude-code/ |-- examples/ # Example configurations and sessions | |-- CLAUDE.md # Example project-level config | |-- user-CLAUDE.md # Example user-level config -| |-- sessions/ # Example session log files (Longform Guide) | |-- mcp-configs/ # MCP server configurations | |-- mcp-servers.json # GitHub, Supabase, Vercel, Railway, etc. | -|-- plugins/ # Plugin ecosystem documentation - |-- README.md # Plugins, marketplaces, skills guide +|-- marketplace.json # Self-hosted marketplace config (for /plugin marketplace add) ``` --- -## Quick Start +## Installation -### 1. Copy what you need +### Option 1: Install as Plugin (Recommended) + +The easiest way to use this repo - install as a Claude Code plugin: + +```bash +# Add this repo as a marketplace +/plugin marketplace add affaan-m/everything-claude-code + +# Install the plugin +/plugin install everything-claude-code@everything-claude-code +``` + +Or add directly to your `~/.claude/settings.json`: + +```json +{ + "extraKnownMarketplaces": { + "everything-claude-code": { + "source": { + "source": "github", + "repo": "affaan-m/everything-claude-code" + } + } + }, + "enabledPlugins": { + "everything-claude-code@everything-claude-code": true + } +} +``` + +This gives you instant access to all commands, agents, skills, and hooks. + +--- + +### Option 2: Manual Installation + +If you prefer manual control over what's installed: ```bash # Clone the repo @@ -129,17 +170,19 @@ cp everything-claude-code/commands/*.md ~/.claude/commands/ cp -r everything-claude-code/skills/* ~/.claude/skills/ ``` -### 2. Add hooks to settings.json +#### Add hooks to settings.json Copy the hooks from `hooks/hooks.json` to your `~/.claude/settings.json`. -### 3. Configure MCPs +#### Configure MCPs Copy desired MCP servers from `mcp-configs/mcp-servers.json` to your `~/.claude.json`. **Important:** Replace `YOUR_*_HERE` placeholders with your actual API keys. -### 4. Read the guides +--- + +### Read the Guides Seriously, read the guides. These configs make 10x more sense with context. diff --git a/commands/checkpoint.md b/commands/checkpoint.md new file mode 100644 index 0000000..06293c0 --- /dev/null +++ b/commands/checkpoint.md @@ -0,0 +1,74 @@ +# Checkpoint Command + +Create or verify a checkpoint in your workflow. + +## Usage + +`/checkpoint [create|verify|list] [name]` + +## Create Checkpoint + +When creating a checkpoint: + +1. Run `/verify quick` to ensure current state is clean +2. Create a git stash or commit with checkpoint name +3. Log checkpoint to `.claude/checkpoints.log`: + +```bash +echo "$(date +%Y-%m-%d-%H:%M) | $CHECKPOINT_NAME | $(git rev-parse --short HEAD)" >> .claude/checkpoints.log +``` + +4. Report checkpoint created + +## Verify Checkpoint + +When verifying against a checkpoint: + +1. Read checkpoint from log +2. Compare current state to checkpoint: + - Files added since checkpoint + - Files modified since checkpoint + - Test pass rate now vs then + - Coverage now vs then + +3. Report: +``` +CHECKPOINT COMPARISON: $NAME +============================ +Files changed: X +Tests: +Y passed / -Z failed +Coverage: +X% / -Y% +Build: [PASS/FAIL] +``` + +## List Checkpoints + +Show all checkpoints with: +- Name +- Timestamp +- Git SHA +- Status (current, behind, ahead) + +## Workflow + +Typical checkpoint flow: + +``` +[Start] --> /checkpoint create "feature-start" + | +[Implement] --> /checkpoint create "core-done" + | +[Test] --> /checkpoint verify "core-done" + | +[Refactor] --> /checkpoint create "refactor-done" + | +[PR] --> /checkpoint verify "feature-start" +``` + +## Arguments + +$ARGUMENTS: +- `create ` - Create named checkpoint +- `verify ` - Verify against named checkpoint +- `list` - Show all checkpoints +- `clear` - Remove old checkpoints (keeps last 5) diff --git a/commands/eval.md b/commands/eval.md new file mode 100644 index 0000000..7ded11d --- /dev/null +++ b/commands/eval.md @@ -0,0 +1,120 @@ +# Eval Command + +Manage eval-driven development workflow. + +## Usage + +`/eval [define|check|report|list] [feature-name]` + +## Define Evals + +`/eval define feature-name` + +Create a new eval definition: + +1. Create `.claude/evals/feature-name.md` with template: + +```markdown +## EVAL: feature-name +Created: $(date) + +### Capability Evals +- [ ] [Description of capability 1] +- [ ] [Description of capability 2] + +### Regression Evals +- [ ] [Existing behavior 1 still works] +- [ ] [Existing behavior 2 still works] + +### Success Criteria +- pass@3 > 90% for capability evals +- pass^3 = 100% for regression evals +``` + +2. Prompt user to fill in specific criteria + +## Check Evals + +`/eval check feature-name` + +Run evals for a feature: + +1. Read eval definition from `.claude/evals/feature-name.md` +2. For each capability eval: + - Attempt to verify criterion + - Record PASS/FAIL + - Log attempt in `.claude/evals/feature-name.log` +3. For each regression eval: + - Run relevant tests + - Compare against baseline + - Record PASS/FAIL +4. Report current status: + +``` +EVAL CHECK: feature-name +======================== +Capability: X/Y passing +Regression: X/Y passing +Status: IN PROGRESS / READY +``` + +## Report Evals + +`/eval report feature-name` + +Generate comprehensive eval report: + +``` +EVAL REPORT: feature-name +========================= +Generated: $(date) + +CAPABILITY EVALS +---------------- +[eval-1]: PASS (pass@1) +[eval-2]: PASS (pass@2) - required retry +[eval-3]: FAIL - see notes + +REGRESSION EVALS +---------------- +[test-1]: PASS +[test-2]: PASS +[test-3]: PASS + +METRICS +------- +Capability pass@1: 67% +Capability pass@3: 100% +Regression pass^3: 100% + +NOTES +----- +[Any issues, edge cases, or observations] + +RECOMMENDATION +-------------- +[SHIP / NEEDS WORK / BLOCKED] +``` + +## List Evals + +`/eval list` + +Show all eval definitions: + +``` +EVAL DEFINITIONS +================ +feature-auth [3/5 passing] IN PROGRESS +feature-search [5/5 passing] READY +feature-export [0/4 passing] NOT STARTED +``` + +## Arguments + +$ARGUMENTS: +- `define ` - Create new eval definition +- `check ` - Run and check evals +- `report ` - Generate full report +- `list` - Show all evals +- `clean` - Remove old eval logs (keeps last 10 runs) diff --git a/commands/orchestrate.md b/commands/orchestrate.md new file mode 100644 index 0000000..30ac2b8 --- /dev/null +++ b/commands/orchestrate.md @@ -0,0 +1,172 @@ +# Orchestrate Command + +Sequential agent workflow for complex tasks. + +## Usage + +`/orchestrate [workflow-type] [task-description]` + +## Workflow Types + +### feature +Full feature implementation workflow: +``` +planner -> tdd-guide -> code-reviewer -> security-reviewer +``` + +### bugfix +Bug investigation and fix workflow: +``` +explorer -> tdd-guide -> code-reviewer +``` + +### refactor +Safe refactoring workflow: +``` +architect -> code-reviewer -> tdd-guide +``` + +### security +Security-focused review: +``` +security-reviewer -> code-reviewer -> architect +``` + +## Execution Pattern + +For each agent in the workflow: + +1. **Invoke agent** with context from previous agent +2. **Collect output** as structured handoff document +3. **Pass to next agent** in chain +4. **Aggregate results** into final report + +## Handoff Document Format + +Between agents, create handoff document: + +```markdown +## HANDOFF: [previous-agent] -> [next-agent] + +### Context +[Summary of what was done] + +### Findings +[Key discoveries or decisions] + +### Files Modified +[List of files touched] + +### Open Questions +[Unresolved items for next agent] + +### Recommendations +[Suggested next steps] +``` + +## Example: Feature Workflow + +``` +/orchestrate feature "Add user authentication" +``` + +Executes: + +1. **Planner Agent** + - Analyzes requirements + - Creates implementation plan + - Identifies dependencies + - Output: `HANDOFF: planner -> tdd-guide` + +2. **TDD Guide Agent** + - Reads planner handoff + - Writes tests first + - Implements to pass tests + - Output: `HANDOFF: tdd-guide -> code-reviewer` + +3. **Code Reviewer Agent** + - Reviews implementation + - Checks for issues + - Suggests improvements + - Output: `HANDOFF: code-reviewer -> security-reviewer` + +4. **Security Reviewer Agent** + - Security audit + - Vulnerability check + - Final approval + - Output: Final Report + +## Final Report Format + +``` +ORCHESTRATION REPORT +==================== +Workflow: feature +Task: Add user authentication +Agents: planner -> tdd-guide -> code-reviewer -> security-reviewer + +SUMMARY +------- +[One paragraph summary] + +AGENT OUTPUTS +------------- +Planner: [summary] +TDD Guide: [summary] +Code Reviewer: [summary] +Security Reviewer: [summary] + +FILES CHANGED +------------- +[List all files modified] + +TEST RESULTS +------------ +[Test pass/fail summary] + +SECURITY STATUS +--------------- +[Security findings] + +RECOMMENDATION +-------------- +[SHIP / NEEDS WORK / BLOCKED] +``` + +## Parallel Execution + +For independent checks, run agents in parallel: + +```markdown +### Parallel Phase +Run simultaneously: +- code-reviewer (quality) +- security-reviewer (security) +- architect (design) + +### Merge Results +Combine outputs into single report +``` + +## Arguments + +$ARGUMENTS: +- `feature ` - Full feature workflow +- `bugfix ` - Bug fix workflow +- `refactor ` - Refactoring workflow +- `security ` - Security review workflow +- `custom ` - Custom agent sequence + +## Custom Workflow Example + +``` +/orchestrate custom "architect,tdd-guide,code-reviewer" "Redesign caching layer" +``` + +## Tips + +1. **Start with planner** for complex features +2. **Always include code-reviewer** before merge +3. **Use security-reviewer** for auth/payment/PII +4. **Keep handoffs concise** - focus on what next agent needs +5. **Run verification** between agents if needed diff --git a/commands/verify.md b/commands/verify.md new file mode 100644 index 0000000..5f628b1 --- /dev/null +++ b/commands/verify.md @@ -0,0 +1,59 @@ +# Verification Command + +Run comprehensive verification on current codebase state. + +## Instructions + +Execute verification in this exact order: + +1. **Build Check** + - Run the build command for this project + - If it fails, report errors and STOP + +2. **Type Check** + - Run TypeScript/type checker + - Report all errors with file:line + +3. **Lint Check** + - Run linter + - Report warnings and errors + +4. **Test Suite** + - Run all tests + - Report pass/fail count + - Report coverage percentage + +5. **Console.log Audit** + - Search for console.log in source files + - Report locations + +6. **Git Status** + - Show uncommitted changes + - Show files modified since last commit + +## Output + +Produce a concise verification report: + +``` +VERIFICATION: [PASS/FAIL] + +Build: [OK/FAIL] +Types: [OK/X errors] +Lint: [OK/X issues] +Tests: [X/Y passed, Z% coverage] +Secrets: [OK/X found] +Logs: [OK/X console.logs] + +Ready for PR: [YES/NO] +``` + +If any critical issues, list them with fix suggestions. + +## Arguments + +$ARGUMENTS can be: +- `quick` - Only build + types +- `full` - All checks (default) +- `pre-commit` - Checks relevant for commits +- `pre-pr` - Full checks plus security scan diff --git a/hooks/hooks.json b/hooks/hooks.json index 00e047a..4b3f415 100644 --- a/hooks/hooks.json +++ b/hooks/hooks.json @@ -47,7 +47,7 @@ "hooks": [ { "type": "command", - "command": "~/.claude/hooks/strategic-compact/suggest-compact.sh" + "command": "./hooks/strategic-compact/suggest-compact.sh" } ], "description": "Suggest manual compaction at logical intervals" @@ -59,7 +59,7 @@ "hooks": [ { "type": "command", - "command": "~/.claude/hooks/memory-persistence/pre-compact.sh" + "command": "./hooks/memory-persistence/pre-compact.sh" } ], "description": "Save state before context compaction" @@ -71,7 +71,7 @@ "hooks": [ { "type": "command", - "command": "~/.claude/hooks/memory-persistence/session-start.sh" + "command": "./hooks/memory-persistence/session-start.sh" } ], "description": "Load previous context on new session" @@ -135,7 +135,7 @@ "hooks": [ { "type": "command", - "command": "~/.claude/hooks/memory-persistence/session-end.sh" + "command": "./hooks/memory-persistence/session-end.sh" } ], "description": "Persist session state on end" @@ -145,7 +145,7 @@ "hooks": [ { "type": "command", - "command": "~/.claude/skills/continuous-learning/evaluate-session.sh" + "command": "./skills/continuous-learning/evaluate-session.sh" } ], "description": "Evaluate session for extractable patterns" diff --git a/skills/backend-patterns.md b/skills/backend-patterns/SKILL.md similarity index 100% rename from skills/backend-patterns.md rename to skills/backend-patterns/SKILL.md diff --git a/skills/clickhouse-io.md b/skills/clickhouse-io/SKILL.md similarity index 100% rename from skills/clickhouse-io.md rename to skills/clickhouse-io/SKILL.md diff --git a/skills/coding-standards.md b/skills/coding-standards/SKILL.md similarity index 100% rename from skills/coding-standards.md rename to skills/coding-standards/SKILL.md diff --git a/skills/continuous-learning/SKILL.md b/skills/continuous-learning/SKILL.md new file mode 100644 index 0000000..84a88dd --- /dev/null +++ b/skills/continuous-learning/SKILL.md @@ -0,0 +1,80 @@ +--- +name: continuous-learning +description: Automatically extract reusable patterns from Claude Code sessions and save them as learned skills for future use. +--- + +# Continuous Learning Skill + +Automatically evaluates Claude Code sessions on end to extract reusable patterns that can be saved as learned skills. + +## How It Works + +This skill runs as a **Stop hook** at the end of each session: + +1. **Session Evaluation**: Checks if session has enough messages (default: 10+) +2. **Pattern Detection**: Identifies extractable patterns from the session +3. **Skill Extraction**: Saves useful patterns to `~/.claude/skills/learned/` + +## Configuration + +Edit `config.json` to customize: + +```json +{ + "min_session_length": 10, + "extraction_threshold": "medium", + "auto_approve": false, + "learned_skills_path": "~/.claude/skills/learned/", + "patterns_to_detect": [ + "error_resolution", + "user_corrections", + "workarounds", + "debugging_techniques", + "project_specific" + ], + "ignore_patterns": [ + "simple_typos", + "one_time_fixes", + "external_api_issues" + ] +} +``` + +## Pattern Types + +| Pattern | Description | +|---------|-------------| +| `error_resolution` | How specific errors were resolved | +| `user_corrections` | Patterns from user corrections | +| `workarounds` | Solutions to framework/library quirks | +| `debugging_techniques` | Effective debugging approaches | +| `project_specific` | Project-specific conventions | + +## Hook Setup + +Add to your `~/.claude/settings.json`: + +```json +{ + "hooks": { + "Stop": [{ + "matcher": "*", + "hooks": [{ + "type": "command", + "command": "~/.claude/skills/continuous-learning/evaluate-session.sh" + }] + }] + } +} +``` + +## Why Stop Hook? + +- **Lightweight**: Runs once at session end +- **Non-blocking**: Doesn't add latency to every message +- **Complete context**: Has access to full session transcript + +## Related + +- [The Longform Guide](https://x.com/affaanmustafa/status/2014040193557471352) - Section on continuous learning +- `/learn` command - Manual pattern extraction mid-session diff --git a/skills/eval-harness/SKILL.md b/skills/eval-harness/SKILL.md new file mode 100644 index 0000000..522937d --- /dev/null +++ b/skills/eval-harness/SKILL.md @@ -0,0 +1,221 @@ +# Eval Harness Skill + +A formal evaluation framework for Claude Code sessions, implementing eval-driven development (EDD) principles. + +## Philosophy + +Eval-Driven Development treats evals as the "unit tests of AI development": +- Define expected behavior BEFORE implementation +- Run evals continuously during development +- Track regressions with each change +- Use pass@k metrics for reliability measurement + +## Eval Types + +### Capability Evals +Test if Claude can do something it couldn't before: +```markdown +[CAPABILITY EVAL: feature-name] +Task: Description of what Claude should accomplish +Success Criteria: + - [ ] Criterion 1 + - [ ] Criterion 2 + - [ ] Criterion 3 +Expected Output: Description of expected result +``` + +### Regression Evals +Ensure changes don't break existing functionality: +```markdown +[REGRESSION EVAL: feature-name] +Baseline: SHA or checkpoint name +Tests: + - existing-test-1: PASS/FAIL + - existing-test-2: PASS/FAIL + - existing-test-3: PASS/FAIL +Result: X/Y passed (previously Y/Y) +``` + +## Grader Types + +### 1. Code-Based Grader +Deterministic checks using code: +```bash +# Check if file contains expected pattern +grep -q "export function handleAuth" src/auth.ts && echo "PASS" || echo "FAIL" + +# Check if tests pass +npm test -- --testPathPattern="auth" && echo "PASS" || echo "FAIL" + +# Check if build succeeds +npm run build && echo "PASS" || echo "FAIL" +``` + +### 2. Model-Based Grader +Use Claude to evaluate open-ended outputs: +```markdown +[MODEL GRADER PROMPT] +Evaluate the following code change: +1. Does it solve the stated problem? +2. Is it well-structured? +3. Are edge cases handled? +4. Is error handling appropriate? + +Score: 1-5 (1=poor, 5=excellent) +Reasoning: [explanation] +``` + +### 3. Human Grader +Flag for manual review: +```markdown +[HUMAN REVIEW REQUIRED] +Change: Description of what changed +Reason: Why human review is needed +Risk Level: LOW/MEDIUM/HIGH +``` + +## Metrics + +### pass@k +"At least one success in k attempts" +- pass@1: First attempt success rate +- pass@3: Success within 3 attempts +- Typical target: pass@3 > 90% + +### pass^k +"All k trials succeed" +- Higher bar for reliability +- pass^3: 3 consecutive successes +- Use for critical paths + +## Eval Workflow + +### 1. Define (Before Coding) +```markdown +## EVAL DEFINITION: feature-xyz + +### Capability Evals +1. Can create new user account +2. Can validate email format +3. Can hash password securely + +### Regression Evals +1. Existing login still works +2. Session management unchanged +3. Logout flow intact + +### Success Metrics +- pass@3 > 90% for capability evals +- pass^3 = 100% for regression evals +``` + +### 2. Implement +Write code to pass the defined evals. + +### 3. Evaluate +```bash +# Run capability evals +[Run each capability eval, record PASS/FAIL] + +# Run regression evals +npm test -- --testPathPattern="existing" + +# Generate report +``` + +### 4. Report +```markdown +EVAL REPORT: feature-xyz +======================== + +Capability Evals: + create-user: PASS (pass@1) + validate-email: PASS (pass@2) + hash-password: PASS (pass@1) + Overall: 3/3 passed + +Regression Evals: + login-flow: PASS + session-mgmt: PASS + logout-flow: PASS + Overall: 3/3 passed + +Metrics: + pass@1: 67% (2/3) + pass@3: 100% (3/3) + +Status: READY FOR REVIEW +``` + +## Integration Patterns + +### Pre-Implementation +``` +/eval define feature-name +``` +Creates eval definition file at `.claude/evals/feature-name.md` + +### During Implementation +``` +/eval check feature-name +``` +Runs current evals and reports status + +### Post-Implementation +``` +/eval report feature-name +``` +Generates full eval report + +## Eval Storage + +Store evals in project: +``` +.claude/ + evals/ + feature-xyz.md # Eval definition + feature-xyz.log # Eval run history + baseline.json # Regression baselines +``` + +## Best Practices + +1. **Define evals BEFORE coding** - Forces clear thinking about success criteria +2. **Run evals frequently** - Catch regressions early +3. **Track pass@k over time** - Monitor reliability trends +4. **Use code graders when possible** - Deterministic > probabilistic +5. **Human review for security** - Never fully automate security checks +6. **Keep evals fast** - Slow evals don't get run +7. **Version evals with code** - Evals are first-class artifacts + +## Example: Adding Authentication + +```markdown +## EVAL: add-authentication + +### Phase 1: Define (10 min) +Capability Evals: +- [ ] User can register with email/password +- [ ] User can login with valid credentials +- [ ] Invalid credentials rejected with proper error +- [ ] Sessions persist across page reloads +- [ ] Logout clears session + +Regression Evals: +- [ ] Public routes still accessible +- [ ] API responses unchanged +- [ ] Database schema compatible + +### Phase 2: Implement (varies) +[Write code] + +### Phase 3: Evaluate +Run: /eval check add-authentication + +### Phase 4: Report +EVAL REPORT: add-authentication +============================== +Capability: 5/5 passed (pass@3: 100%) +Regression: 3/3 passed (pass^3: 100%) +Status: SHIP IT +``` diff --git a/skills/frontend-patterns.md b/skills/frontend-patterns/SKILL.md similarity index 100% rename from skills/frontend-patterns.md rename to skills/frontend-patterns/SKILL.md diff --git a/skills/project-guidelines-example.md b/skills/project-guidelines-example/SKILL.md similarity index 100% rename from skills/project-guidelines-example.md rename to skills/project-guidelines-example/SKILL.md diff --git a/skills/strategic-compact/SKILL.md b/skills/strategic-compact/SKILL.md new file mode 100644 index 0000000..394a86b --- /dev/null +++ b/skills/strategic-compact/SKILL.md @@ -0,0 +1,63 @@ +--- +name: strategic-compact +description: Suggests manual context compaction at logical intervals to preserve context through task phases rather than arbitrary auto-compaction. +--- + +# Strategic Compact Skill + +Suggests manual `/compact` at strategic points in your workflow rather than relying on arbitrary auto-compaction. + +## Why Strategic Compaction? + +Auto-compaction triggers at arbitrary points: +- Often mid-task, losing important context +- No awareness of logical task boundaries +- Can interrupt complex multi-step operations + +Strategic compaction at logical boundaries: +- **After exploration, before execution** - Compact research context, keep implementation plan +- **After completing a milestone** - Fresh start for next phase +- **Before major context shifts** - Clear exploration context before different task + +## How It Works + +The `suggest-compact.sh` script runs on PreToolUse (Edit/Write) and: + +1. **Tracks tool calls** - Counts tool invocations in session +2. **Threshold detection** - Suggests at configurable threshold (default: 50 calls) +3. **Periodic reminders** - Reminds every 25 calls after threshold + +## Hook Setup + +Add to your `~/.claude/settings.json`: + +```json +{ + "hooks": { + "PreToolUse": [{ + "matcher": "tool == \"Edit\" || tool == \"Write\"", + "hooks": [{ + "type": "command", + "command": "~/.claude/skills/strategic-compact/suggest-compact.sh" + }] + }] + } +} +``` + +## Configuration + +Environment variables: +- `COMPACT_THRESHOLD` - Tool calls before first suggestion (default: 50) + +## Best Practices + +1. **Compact after planning** - Once plan is finalized, compact to start fresh +2. **Compact after debugging** - Clear error-resolution context before continuing +3. **Don't compact mid-implementation** - Preserve context for related changes +4. **Read the suggestion** - The hook tells you *when*, you decide *if* + +## Related + +- [The Longform Guide](https://x.com/affaanmustafa/status/2014040193557471352) - Token optimization section +- Memory persistence hooks - For state that survives compaction diff --git a/skills/verification-loop/SKILL.md b/skills/verification-loop/SKILL.md new file mode 100644 index 0000000..b56bb7e --- /dev/null +++ b/skills/verification-loop/SKILL.md @@ -0,0 +1,120 @@ +# Verification Loop Skill + +A comprehensive verification system for Claude Code sessions. + +## When to Use + +Invoke this skill: +- After completing a feature or significant code change +- Before creating a PR +- When you want to ensure quality gates pass +- After refactoring + +## Verification Phases + +### Phase 1: Build Verification +```bash +# Check if project builds +npm run build 2>&1 | tail -20 +# OR +pnpm build 2>&1 | tail -20 +``` + +If build fails, STOP and fix before continuing. + +### Phase 2: Type Check +```bash +# TypeScript projects +npx tsc --noEmit 2>&1 | head -30 + +# Python projects +pyright . 2>&1 | head -30 +``` + +Report all type errors. Fix critical ones before continuing. + +### Phase 3: Lint Check +```bash +# JavaScript/TypeScript +npm run lint 2>&1 | head -30 + +# Python +ruff check . 2>&1 | head -30 +``` + +### Phase 4: Test Suite +```bash +# Run tests with coverage +npm run test -- --coverage 2>&1 | tail -50 + +# Check coverage threshold +# Target: 80% minimum +``` + +Report: +- Total tests: X +- Passed: X +- Failed: X +- Coverage: X% + +### Phase 5: Security Scan +```bash +# Check for secrets +grep -rn "sk-" --include="*.ts" --include="*.js" . 2>/dev/null | head -10 +grep -rn "api_key" --include="*.ts" --include="*.js" . 2>/dev/null | head -10 + +# Check for console.log +grep -rn "console.log" --include="*.ts" --include="*.tsx" src/ 2>/dev/null | head -10 +``` + +### Phase 6: Diff Review +```bash +# Show what changed +git diff --stat +git diff HEAD~1 --name-only +``` + +Review each changed file for: +- Unintended changes +- Missing error handling +- Potential edge cases + +## Output Format + +After running all phases, produce a verification report: + +``` +VERIFICATION REPORT +================== + +Build: [PASS/FAIL] +Types: [PASS/FAIL] (X errors) +Lint: [PASS/FAIL] (X warnings) +Tests: [PASS/FAIL] (X/Y passed, Z% coverage) +Security: [PASS/FAIL] (X issues) +Diff: [X files changed] + +Overall: [READY/NOT READY] for PR + +Issues to Fix: +1. ... +2. ... +``` + +## Continuous Mode + +For long sessions, run verification every 15 minutes or after major changes: + +```markdown +Set a mental checkpoint: +- After completing each function +- After finishing a component +- Before moving to next task + +Run: /verify +``` + +## Integration with Hooks + +This skill complements PostToolUse hooks but provides deeper verification. +Hooks catch issues immediately; this skill provides comprehensive review.