{
  "metadata": {
    "name": "apiroute.dev recommend-route contract",
    "version": "0.2",
    "generated_at": "2026-06-14T11:10:00.000Z",
    "last_verified": "2026-06-14",
    "status": "runtime_live",
    "canonical_url": "https://apiroute.dev/api/recommend-route",
    "json_url": "https://apiroute.dev/api/recommend-route.json",
    "runtime_method_planned": "POST",
    "current_get_behavior": "GET returns this machine-readable contract. POST calculates a live rule-based route recommendation."
  },
  "purpose": "Document the live server-side route recommendation API and its request, response, scoring, source, and caveat contract.",
  "status_policy": {
    "runtime_available": true,
    "safe_for_agent_planning": true,
    "safe_for_live_routing": true,
    "required_caveat": "This endpoint returns rule-based estimates from the current apiroute.dev pricing snapshot. Verify provider pages before production routing or purchasing decisions."
  },
  "planned_request": {
    "method": "POST",
    "content_type": "application/json",
    "body_schema": {
      "task": {
        "type": "string",
        "required": true,
        "description": "Short natural-language description of the workload."
      },
      "use_case": {
        "type": "enum",
        "required": true,
        "values": [
          "general",
          "coding",
          "rag",
          "translation",
          "vision",
          "agents"
        ]
      },
      "priority": {
        "type": "enum",
        "required": true,
        "values": [
          "cheapest",
          "balanced",
          "premium"
        ]
      },
      "privacy_class": {
        "type": "enum",
        "required": false,
        "values": [
          "public",
          "internal",
          "sensitive",
          "private"
        ],
        "default": "public",
        "description": "Conservative routing gate. sensitive/private force local_first and exclude providers with known retention concerns from the cloud shortlist."
      },
      "prompt_tokens": {
        "type": "integer",
        "required": true,
        "minimum": 1
      },
      "expected_output_tokens": {
        "type": "integer",
        "required": true,
        "minimum": 1
      },
      "cache_share": {
        "type": "number",
        "required": false,
        "minimum": 0,
        "maximum": 1,
        "default": 0
      },
      "requirements": {
        "type": "object",
        "required": false,
        "properties": {
          "vision": "boolean",
          "function_calling": "boolean",
          "prompt_caching": "boolean",
          "min_context_window": "integer",
          "min_output_tokens": "integer",
          "allowed_providers": "array<string>",
          "excluded_providers": "array<string>",
          "allowed_model_groups": "array<string>",
          "excluded_model_groups": "array<string>"
        }
      },
      "local_first": {
        "type": "boolean",
        "required": false,
        "default": false,
        "description": "When true, check localai.apiroute.dev suitability before recommending cloud/API routes."
      }
    }
  },
  "planned_response": {
    "content_type": "application/json",
    "top_level_fields": [
      "metadata",
      "input",
      "recommendation",
      "alternatives",
      "filtered_out",
      "local_fallback",
      "caveats",
      "sources"
    ],
    "recommendation_fields": [
      "route",
      "model",
      "provider",
      "estimated_total_cost_usd",
      "estimated_input_cost_usd",
      "estimated_output_cost_usd",
      "estimated_cache_read_cost_usd",
      "context_window",
      "max_output_tokens",
      "score",
      "why"
    ]
  },
  "privacy_policy": {
    "classes": [
      {
        "id": "public",
        "meaning": "Public website copy, public code, public pricing, or non-sensitive examples.",
        "routing": "Normal cloud/API recommendation allowed."
      },
      {
        "id": "internal",
        "meaning": "Internal project context without secrets, private health, finance, customer raw data, or regulated material.",
        "routing": "Cloud/API recommendation allowed with normal caveats."
      },
      {
        "id": "sensitive",
        "meaning": "Customer raw data, credentials-adjacent logs, unpublished business data, or sensitive operational material.",
        "routing": "local_first is forced; providers with retention concerns are excluded from the cloud shortlist."
      },
      {
        "id": "private",
        "meaning": "Personal health, finance, family, identity, or highly confidential files.",
        "routing": "local_first is forced; use local/private approved environments before cloud models."
      }
    ],
    "current_conservative_exclusions": {
      "sensitive": [
        "anthropic"
      ],
      "private": [
        "anthropic"
      ]
    },
    "caveat": "These privacy filters are operational guardrails for model routing. They are not legal or compliance advice."
  },
  "scoring_contract": {
    "data_sources": [
      "https://apiroute.dev/api/live-prices",
      "https://apiroute.dev/api/route-recommendation-guide",
      "https://localai.apiroute.dev/data/agent-model-guide.json"
    ],
    "hard_filters": [
      "pricing_status must be active",
      "context_window must fit prompt_tokens + expected_output_tokens",
      "max_output_tokens must fit expected_output_tokens",
      "vision must match when requirements.vision is true",
      "function_calling must match when requirements.function_calling is true",
      "prompt_caching must match when requirements.prompt_caching is true",
      "provider allow/exclude filters must be respected",
      "model group allow/exclude filters must be respected",
      "privacy_class sensitive/private force local_first and conservative provider exclusions"
    ],
    "cost_formula": {
      "cached_prompt_tokens": "prompt_tokens * cache_share",
      "uncached_prompt_tokens": "prompt_tokens - cached_prompt_tokens",
      "estimated_input_cost_usd": "(uncached_prompt_tokens / 1000000) * input_cost_per_1m",
      "estimated_cache_read_cost_usd": "(cached_prompt_tokens / 1000000) * cache_read_cost_per_1m",
      "estimated_output_cost_usd": "(expected_output_tokens / 1000000) * output_cost_per_1m",
      "estimated_total_cost_usd": "estimated_input_cost_usd + estimated_cache_read_cost_usd + estimated_output_cost_usd"
    },
    "route_behavior": {
      "cheapest": "Sort matching models by estimated_total_cost_usd ascending.",
      "balanced": "Blend estimated cost, use-case tag fit, context reserve, prompt caching fit, and capability fit.",
      "premium": "Prefer highest capability/frontier fit after hard filters, then use cost as a tiebreaker."
    },
    "non_ranking_inputs": [
      "affiliate status",
      "sponsorship status",
      "referral status",
      "owned waitlist status",
      "premium product interest"
    ]
  },
  "workload_presets": [
    {
      "id": "coding-agent",
      "label": "Coding Agent",
      "description": "Multi-file coding agent loop with tool calls, patch review, and repeated project context.",
      "body": {
        "task": "Multi-file coding agent loop with tool calls, patch review, and repeated project context.",
        "use_case": "coding",
        "priority": "balanced",
        "prompt_tokens": 32000,
        "expected_output_tokens": 4000,
        "cache_share": 0.55,
        "privacy_class": "internal",
        "requirements": {
          "function_calling": true,
          "prompt_caching": true,
          "min_context_window": 36000
        },
        "local_first": true
      }
    },
    {
      "id": "rag-docs",
      "label": "RAG Docs",
      "description": "RAG over long documents with repeated document prefixes and decision-ready summaries.",
      "body": {
        "task": "RAG over long documents with repeated document prefixes and decision-ready summaries.",
        "use_case": "rag",
        "priority": "balanced",
        "prompt_tokens": 85000,
        "expected_output_tokens": 3500,
        "cache_share": 0.7,
        "privacy_class": "internal",
        "requirements": {
          "prompt_caching": true,
          "min_context_window": 88500
        },
        "local_first": false
      }
    },
    {
      "id": "vision-task",
      "label": "Vision Task",
      "description": "Image and document screenshot analysis with structured reasoning output.",
      "body": {
        "task": "Image and document screenshot analysis with structured reasoning output.",
        "use_case": "vision",
        "priority": "premium",
        "prompt_tokens": 6000,
        "expected_output_tokens": 2500,
        "cache_share": 0.1,
        "privacy_class": "public",
        "requirements": {
          "vision": true,
          "min_context_window": 8500
        },
        "local_first": false
      }
    },
    {
      "id": "cheap-batch",
      "label": "Cheap Batch",
      "description": "High-volume text classification, translation, cleanup, or extraction where cost matters most.",
      "body": {
        "task": "High-volume text classification, translation, cleanup, or extraction where cost matters most.",
        "use_case": "translation",
        "priority": "cheapest",
        "prompt_tokens": 12000,
        "expected_output_tokens": 1800,
        "cache_share": 0,
        "privacy_class": "public",
        "requirements": {
          "min_context_window": 13800
        },
        "local_first": false
      }
    },
    {
      "id": "local-agent",
      "label": "Local-first Agent",
      "description": "Local-first personal agent fallback check before selecting a cloud route.",
      "body": {
        "task": "Local-first personal agent fallback check before selecting a cloud route.",
        "use_case": "agents",
        "priority": "cheapest",
        "prompt_tokens": 16000,
        "expected_output_tokens": 2500,
        "cache_share": 0.25,
        "privacy_class": "sensitive",
        "requirements": {
          "function_calling": true,
          "min_context_window": 18500
        },
        "local_first": true
      }
    }
  ],
  "agent_workload_estimator": {
    "status": "homepage_tool_live",
    "description": "The homepage includes an agent workload cost estimator for multi-step agent runs. It is a planning calculator, not a separate API endpoint.",
    "inputs": {
      "preset": [
        "chat-answer",
        "rag-briefing",
        "coding-agent",
        "browser-agent",
        "background-monitor"
      ],
      "iterations": "Number of model turns in the agent run.",
      "tool_calls_per_iteration": "Expected tool calls per model turn.",
      "system_memory_tokens_per_iteration": "Repeated system prompt, memory, rules, and stable agent context.",
      "context_tokens_per_iteration": "Task, document, code, browser, or retrieved context per turn.",
      "tool_overhead_tokens_per_iteration": "Tool call schemas, tool results, browser state, logs, or API payload summaries per turn.",
      "output_tokens_per_iteration": "Expected model output per turn.",
      "cache_share": "Estimated share of input tokens billed as cached reads."
    },
    "formula": {
      "input_per_iteration": "system_memory_tokens_per_iteration + context_tokens_per_iteration + tool_overhead_tokens_per_iteration",
      "total_input_tokens": "input_per_iteration * iterations",
      "cached_input_tokens": "total_input_tokens * cache_share",
      "uncached_input_tokens": "total_input_tokens - cached_input_tokens",
      "total_output_tokens": "output_tokens_per_iteration * iterations",
      "selected_model_run_cost": "(uncached_input_tokens / 1000000) * input_cost_per_1m + (cached_input_tokens / 1000000) * cache_read_cost_per_1m + (total_output_tokens / 1000000) * output_cost_per_1m"
    },
    "recommend_route_mapping": {
      "prompt_tokens": "total_input_tokens",
      "expected_output_tokens": "total_output_tokens",
      "cache_share": "cache_share",
      "requirements.min_context_window": "input_per_iteration + output_tokens_per_iteration",
      "requirements.function_calling": "true for tool-using agent presets",
      "requirements.vision": "true for browser or screenshot-heavy workflows"
    },
    "caveat": "Actual agent cost depends on retry behavior, hidden provider overhead, tool result sizes, cache eligibility, and final prompt construction."
  },
  "example_requests": [
    {
      "name": "coding_agent_balanced",
      "body": {
        "task": "Multi-file coding agent loop with tool calls and patch review.",
        "use_case": "coding",
        "priority": "balanced",
        "prompt_tokens": 32000,
        "expected_output_tokens": 4000,
        "cache_share": 0.55,
        "requirements": {
          "function_calling": true,
          "prompt_caching": true,
          "min_context_window": 64000
        },
        "local_first": true
      }
    },
    {
      "name": "rag_cost_check_cheapest",
      "body": {
        "task": "Answer questions over a long internal document set.",
        "use_case": "rag",
        "priority": "cheapest",
        "prompt_tokens": 90000,
        "expected_output_tokens": 1500,
        "cache_share": 0.8,
        "requirements": {
          "prompt_caching": true,
          "min_context_window": 128000
        },
        "local_first": false
      }
    },
    {
      "name": "vision_premium",
      "body": {
        "task": "Analyze screenshots and explain UI quality issues.",
        "use_case": "vision",
        "priority": "premium",
        "prompt_tokens": 6000,
        "expected_output_tokens": 2500,
        "cache_share": 0,
        "requirements": {
          "vision": true,
          "min_output_tokens": 2500
        },
        "local_first": false
      }
    }
  ],
  "example_response_shape": {
    "metadata": {
      "status": "example_only",
      "runtime_available": false,
      "generated_from": [
        "https://apiroute.dev/api/live-prices",
        "https://apiroute.dev/api/route-recommendation-guide"
      ]
    },
    "input": {
      "use_case": "coding",
      "priority": "balanced",
      "prompt_tokens": 32000,
      "expected_output_tokens": 4000,
      "cache_share": 0.55
    },
    "recommendation": {
      "route": "balanced",
      "model": "example-model-name",
      "provider": "example-provider",
      "estimated_total_cost_usd": 0.1234,
      "context_window": 128000,
      "max_output_tokens": 8192,
      "score": 0.87,
      "why": [
        "Fits context and output requirements.",
        "Supports required capabilities.",
        "Balances cost against coding fit and context reserve."
      ]
    },
    "alternatives": [
      {
        "route": "cheaper_alternative",
        "model": "example-cheaper-model",
        "provider": "example-provider",
        "estimated_total_cost_usd": 0.0612,
        "why": [
          "Lower estimated cost.",
          "Lower capability score than the balanced recommendation."
        ]
      }
    ],
    "filtered_out": [
      {
        "model": "example-filtered-model",
        "reason": "context_window_too_small"
      }
    ],
    "local_fallback": {
      "checked": true,
      "source": "https://localai.apiroute.dev/data/agent-model-guide.json",
      "summary": "Use local fit guidance before forcing an API route."
    },
    "caveats": [
      "This example response is illustrative.",
      "Provider pages should be verified before production use or purchasing decisions.",
      "Commercial relationships must not influence ranking."
    ],
    "sources": [
      "https://apiroute.dev/api/live-prices",
      "https://apiroute.dev/api/route-recommendation-guide",
      "https://apiroute.dev/api/agent-instructions"
    ]
  },
  "implementation_plan": [
    {
      "phase": "static_contract",
      "status": "complete_when_deployed",
      "description": "Publish this contract and examples as a stable GET endpoint."
    },
    {
      "phase": "runtime_function",
      "status": "complete",
      "description": "A small server-side POST handler reads static pricing JSON and returns a computed recommendation."
    },
    {
      "phase": "validation_and_telemetry",
      "status": "complete",
      "description": "Deterministic checks, live curl checks, and Vercel Web Analytics custom events for route requests, preset selection, copy actions, and agent workload usage are in place."
    }
  ]
}