package scrape

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"regexp"
	"strings"
	"time"
)

// bearerRe matches "Bearer <token>" patterns (case-insensitive) for redaction.
var bearerRe = regexp.MustCompile(`(?i)Bearer\s+\S+`)

const defaultFirecrawlBaseURL = "https://api.firecrawl.dev"

// FirecrawlError is the typed error returned by FirecrawlClient.Scrape.
type FirecrawlError struct {
	Code      string
	Message   string
	Retryable bool
}

func (e *FirecrawlError) Error() string {
	return fmt.Sprintf("%s: %s", e.Code, e.Message)
}

// FirecrawlResult mirrors the shape of scrape.FetchResult enough for the
// executor to surface it uniformly to the agent.
type FirecrawlResult struct {
	URL       string         `json:"url"`
	Status    int            `json:"status"`
	Title     string         `json:"title"`
	Markdown  string         `json:"markdown"`
	Metadata  map[string]any `json:"metadata"`
	PagesUsed int            `json:"pages_used"` // always 1 for /scrape
}

// FirecrawlClient is a thin wrapper around Firecrawl's /v2/scrape endpoint.
// It does not depend on the official Go SDK because:
//  1. Firecrawl's docs are ambiguous about SDK support level,
//  2. Our v1 API surface is a single POST.
type FirecrawlClient struct {
	apiKey     string
	httpClient *http.Client
	// BaseURL is exported only so tests can swap in an httptest.Server URL.
	BaseURL string
}

// NewFirecrawlClient constructs a FirecrawlClient with the given API key and
// HTTP timeout. A timeout <= 0 defaults to 30 seconds.
func NewFirecrawlClient(apiKey string, timeout time.Duration) *FirecrawlClient {
	if timeout <= 0 {
		timeout = 30 * time.Second
	}
	return &FirecrawlClient{
		apiKey:     apiKey,
		httpClient: &http.Client{Timeout: timeout},
		BaseURL:    defaultFirecrawlBaseURL,
	}
}

// scrapeResponse is the wire shape returned by /v2/scrape.
type scrapeResponse struct {
	Success bool `json:"success"`
	Data    struct {
		Markdown string         `json:"markdown"`
		Metadata map[string]any `json:"metadata"`
	} `json:"data"`
	Error string `json:"error,omitempty"`
}

// Scrape calls POST /v2/scrape and returns the markdown + metadata.
// Returns a typed *FirecrawlError on failure.
func (c *FirecrawlClient) Scrape(ctx context.Context, url string) (*FirecrawlResult, error) {
	if c.apiKey == "" {
		return nil, &FirecrawlError{Code: "firecrawl_missing_key", Message: "no API key configured"}
	}

	reqBody, _ := json.Marshal(map[string]any{
		"url":     url,
		"formats": []string{"markdown"},
	})
	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/v2/scrape", bytes.NewReader(reqBody))
	if err != nil {
		return nil, &FirecrawlError{Code: "firecrawl_internal", Message: err.Error()}
	}
	req.Header.Set("Authorization", "Bearer "+c.apiKey)
	req.Header.Set("Content-Type", "application/json")

	resp, err := c.httpClient.Do(req)
	if err != nil {
		if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
			return nil, &FirecrawlError{Code: "firecrawl_timeout", Message: err.Error(), Retryable: true}
		}
		return nil, &FirecrawlError{Code: "firecrawl_network", Message: err.Error(), Retryable: true}
	}
	defer func() { _ = resp.Body.Close() }()

	switch {
	case resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden:
		return nil, &FirecrawlError{Code: "firecrawl_invalid_key", Message: fmt.Sprintf("HTTP %d", resp.StatusCode)}
	case resp.StatusCode == http.StatusTooManyRequests:
		return nil, &FirecrawlError{Code: "firecrawl_rate_limited", Message: "HTTP 429", Retryable: true}
	case resp.StatusCode >= 500:
		return nil, &FirecrawlError{Code: "firecrawl_server_error", Message: fmt.Sprintf("HTTP %d", resp.StatusCode), Retryable: true}
	case resp.StatusCode >= 400:
		body, _ := io.ReadAll(resp.Body)
		return nil, &FirecrawlError{
			Code:    "firecrawl_bad_request",
			Message: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, c.redactSensitive(string(body))),
		}
	}

	var sr scrapeResponse
	if err := json.NewDecoder(resp.Body).Decode(&sr); err != nil {
		return nil, &FirecrawlError{Code: "firecrawl_bad_response", Message: err.Error()}
	}
	if !sr.Success {
		return nil, &FirecrawlError{Code: "firecrawl_blocked", Message: c.redactSensitive(sr.Error)}
	}

	title := ""
	if t, ok := sr.Data.Metadata["title"].(string); ok {
		title = t
	}
	status := 200
	if sc, ok := sr.Data.Metadata["statusCode"].(float64); ok {
		status = int(sc)
	}

	return &FirecrawlResult{
		URL:       url,
		Status:    status,
		Title:     title,
		Markdown:  sr.Data.Markdown,
		Metadata:  sr.Data.Metadata,
		PagesUsed: 1,
	}, nil
}

// redactSensitive truncates a response body to maxLen chars and redacts
// Bearer tokens + the configured API key from the string. Defends against
// upstream API behaviour changes that might echo request headers/body.
func (c *FirecrawlClient) redactSensitive(body string) string {
	const maxLen = 256
	if len(body) > maxLen {
		body = body[:maxLen] + "...(truncated)"
	}
	// Redact "Bearer <token>" patterns (case-insensitive)
	body = bearerRe.ReplaceAllString(body, "Bearer [REDACTED]")
	// Also redact the exact configured API key if it leaked through
	if c.apiKey != "" {
		body = strings.ReplaceAll(body, c.apiKey, "[REDACTED]")
	}
	return body
}
