{
  "openapi": "3.1.0",
  "info": {
    "title": "Extractly API",
    "description": "Extractly is a single-endpoint API that scrapes any URL and returns structured JSON matching a user-defined schema. Send a URL and a JSON schema — get back clean, validated JSON. Built for AI agents and developers who need reliable, schema-consistent web data extraction without building or maintaining their own scraping and LLM pipeline.\n\n**How it works:**\n1. Send a URL and a JSON schema to the `/extract` endpoint\n2. Extractly scrapes the page, runs it through Claude Sonnet, and validates the output\n3. Receive clean, structured JSON matching your schema\n\n**Free tier:** 50 credits on signup, no credit card required.\n\n**Docs:** https://getextractly.com/docs  \n**MCP Package:** `npx extractly-mcp` (npm)\n",
    "version": "1.0.0",
    "contact": {
      "url": "https://getextractly.com"
    },
    "license": {
      "name": "Commercial",
      "url": "https://getextractly.com/terms"
    }
  },
  "servers": [
    {
      "url": "https://getextractly.com/api/v1",
      "description": "Production"
    }
  ],
  "security": [
    {
      "ApiKeyAuth": []
    }
  ],
  "paths": {
    "/extract": {
      "post": {
        "operationId": "extractStructuredData",
        "summary": "Extract structured data from any URL",
        "description": "Scrapes the given URL and extracts structured JSON data matching the provided schema. The pipeline handles scraping, LLM extraction via Claude Sonnet, schema validation, and caching automatically.\n\n**Credit costs:**\n- Cache hit (same URL + schema within 24 hours): 1 credit\n- Cache miss, standard page: 5 credits\n- Cache miss, JavaScript-heavy page: 10 credits\n\n**Caching:** Results are cached for 24 hours keyed on URL + schema. Repeated calls with the same URL and schema cost only 1 credit.\n",
        "tags": ["Extraction"],
        "security": [
          {
            "ApiKeyAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/ExtractRequest"
              },
              "examples": {
                "product_price": {
                  "summary": "Extract product name and price",
                  "value": {
                    "url": "https://example.com/product/widget",
                    "schema": {
                      "product_name": "string",
                      "price": "number",
                      "in_stock": "boolean",
                      "description": "string"
                    }
                  }
                },
                "news_article": {
                  "summary": "Extract news article metadata",
                  "value": {
                    "url": "https://example.com/news/some-article",
                    "schema": {
                      "title": "string",
                      "author": "string",
                      "published_date": "string",
                      "summary": "string",
                      "tags": ["string"]
                    }
                  }
                },
                "company_info": {
                  "summary": "Extract company information",
                  "value": {
                    "url": "https://example.com/about",
                    "schema": {
                      "company_name": "string",
                      "founded": "string",
                      "headquarters": "string",
                      "employee_count": "string",
                      "description": "string"
                    }
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Extraction successful. Returns structured JSON matching the schema provided in the request.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ExtractResponse"
                },
                "examples": {
                  "product_result": {
                    "summary": "Successful product extraction",
                    "value": {
                      "data": {
                        "product_name": "Blue Widget Pro",
                        "price": 49.99,
                        "in_stock": true,
                        "description": "The professional grade blue widget for serious widget users."
                      },
                      "cached": false,
                      "credits_used": 5
                    }
                  },
                  "cached_result": {
                    "summary": "Cached result (costs 1 credit)",
                    "value": {
                      "data": {
                        "product_name": "Blue Widget Pro",
                        "price": 49.99,
                        "in_stock": true,
                        "description": "The professional grade blue widget for serious widget users."
                      },
                      "cached": true,
                      "credits_used": 1
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Insufficient credits. The API key does not have enough credits to complete the extraction.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                },
                "example": {
                  "error": "Insufficient credits",
                  "message": "Your account has 0 credits remaining. Purchase more credits at https://getextractly.com/pricing",
                  "credits_remaining": 0
                }
              }
            }
          },
          "401": {
            "description": "Invalid or missing API key.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                },
                "example": {
                  "error": "Unauthorized",
                  "message": "Invalid or missing API key. Include your key in the X-API-Key header."
                }
              }
            }
          },
          "422": {
            "description": "Extraction validation failed. The scraped content could not be reliably mapped to the provided schema.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ValidationErrorResponse"
                },
                "example": {
                  "error": "Validation failed",
                  "message": "The extracted data did not match the provided schema.",
                  "details": {
                    "field": "price",
                    "issue": "Expected number, received null"
                  }
                }
              }
            }
          },
          "500": {
            "description": "Internal server error. Scraping or extraction pipeline failure.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ErrorResponse"
                },
                "example": {
                  "error": "Internal server error",
                  "message": "The scraping pipeline encountered an error. Please try again."
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "ApiKeyAuth": {
        "type": "apiKey",
        "in": "header",
        "name": "X-API-Key",
        "description": "Your Extractly API key. Format: `sk_live_` followed by 24 alphanumeric characters. Obtain your key by signing up at https://getextractly.com."
      }
    },
    "schemas": {
      "ExtractRequest": {
        "type": "object",
        "required": ["url", "schema"],
        "properties": {
          "url": {
            "type": "string",
            "format": "uri",
            "description": "The fully qualified URL of the page to scrape and extract data from.",
            "example": "https://example.com/product/widget"
          },
          "schema": {
            "type": "object",
            "description": "A JSON object defining the shape of the data you want extracted. Keys are field names, values describe the expected data type or structure. Supports nested objects and arrays.",
            "example": {
              "product_name": "string",
              "price": "number",
              "in_stock": "boolean",
              "tags": ["string"]
            }
          }
        }
      },
      "ExtractResponse": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "description": "The extracted data, structured as a JSON object matching the schema you provided in the request.",
            "additionalProperties": true
          },
          "cached": {
            "type": "boolean",
            "description": "Whether this result was served from the cache. Cached results cost 1 credit. Cache TTL is 24 hours, keyed on URL + schema."
          },
          "credits_used": {
            "type": "integer",
            "description": "Number of credits deducted from your account for this request.",
            "example": 5
          }
        }
      },
      "ErrorResponse": {
        "type": "object",
        "properties": {
          "error": {
            "type": "string",
            "description": "Short error identifier."
          },
          "message": {
            "type": "string",
            "description": "Human-readable description of the error."
          },
          "credits_remaining": {
            "type": "integer",
            "description": "Remaining credit balance (included on 402 responses)."
          }
        }
      },
      "ValidationErrorResponse": {
        "type": "object",
        "properties": {
          "error": {
            "type": "string",
            "description": "Short error identifier."
          },
          "message": {
            "type": "string",
            "description": "Human-readable description of the validation failure."
          },
          "details": {
            "type": "object",
            "description": "Structured details about which field failed validation and why.",
            "additionalProperties": true
          }
        }
      }
    }
  },
  "tags": [
    {
      "name": "Extraction",
      "description": "The core extraction endpoint. Send a URL and schema, receive structured JSON."
    }
  ],
  "externalDocs": {
    "description": "Full documentation",
    "url": "https://getextractly.com/docs"
  }
}
