package scrape

import (
	"strings"

	"github.com/PuerkitoBio/goquery"
)

// Metadata holds extracted structured page data.
type Metadata struct {
	Description string            `json:"description,omitempty"`
	OG          map[string]string `json:"og,omitempty"`
	Twitter     map[string]string `json:"twitter,omitempty"`
	Canonical   string            `json:"canonical,omitempty"`
	Links       []string          `json:"links,omitempty"`
	Images      []string          `json:"images,omitempty"`
}

// extractMetadata pulls common metadata fields from a goquery Document.
func extractMetadata(doc *goquery.Document) Metadata {
	m := Metadata{
		OG:      map[string]string{},
		Twitter: map[string]string{},
	}
	doc.Find("meta").Each(func(_ int, s *goquery.Selection) {
		name, _ := s.Attr("name")
		prop, _ := s.Attr("property")
		content, _ := s.Attr("content")
		switch {
		case name == "description":
			m.Description = content
		case strings.HasPrefix(prop, "og:"):
			m.OG[strings.TrimPrefix(prop, "og:")] = content
		case strings.HasPrefix(name, "twitter:"):
			m.Twitter[strings.TrimPrefix(name, "twitter:")] = content
		}
	})
	if href, ok := doc.Find("link[rel=canonical]").First().Attr("href"); ok {
		m.Canonical = href
	}
	doc.Find("a[href]").Each(func(_ int, s *goquery.Selection) {
		if href, ok := s.Attr("href"); ok && href != "" {
			m.Links = append(m.Links, href)
		}
	})
	doc.Find("img[src]").Each(func(_ int, s *goquery.Selection) {
		if src, ok := s.Attr("src"); ok && src != "" {
			m.Images = append(m.Images, src)
		}
	})
	if len(m.Links) > 100 {
		m.Links = m.Links[:100]
	}
	if len(m.Images) > 50 {
		m.Images = m.Images[:50]
	}
	return m
}
