"""Public, unauthenticated tracking ingestion for external sites.

Endpoints:
  - GET/POST /api/analytics/collect   -> record a Hit from a site's JS tag
  - GET      /api/analytics/tag.js     -> the embeddable tracking snippet

Cross-origin note: the tag sends a "simple" request (POST text/plain or GET
pixel) so the browser issues no CORS preflight and the response is ignored.
We therefore do not require the origin to be in CORS_ALLOWED_ORIGINS.
"""
import json
import logging
from urllib.parse import urlparse, parse_qs

from django.conf import settings
from django.http import HttpResponse, JsonResponse
from django.utils import timezone
from django.views.decorators.csrf import csrf_exempt

from rest_framework.decorators import api_view, authentication_classes, permission_classes, throttle_classes
from rest_framework.permissions import AllowAny
from rest_framework.throttling import AnonRateThrottle

from .models import Hit, Site
from .ua_parser import parse_ua
from .utils import client_ip, hash_ip, hash_visitor, referer_host

log = logging.getLogger(__name__)

# 1x1 transparent GIF for the no-JS / GET pixel fallback.
_PIXEL = bytes.fromhex("47494638396101000100800000000000ffffff21f90401000000002c00000000010001000002024401003b")

_SALT = getattr(settings, "ANALYTICS_HASH_SALT", "") or settings.SECRET_KEY

# Bounds for the untrusted custom-event payload, to prevent storage abuse.
_MAX_PROPS_BYTES = 4096
_MAX_PROPS_KEYS = 20
_MAX_PROPS_STR = 256


def _clean_props(raw) -> dict:
    """Keep only bounded scalar key/values. Drops nested structures and oversized payloads."""
    if not isinstance(raw, dict) or not raw:
        return {}
    cleaned = {}
    for k, v in list(raw.items())[:_MAX_PROPS_KEYS]:
        if isinstance(v, str):
            cleaned[str(k)[:64]] = v[:_MAX_PROPS_STR]
        elif isinstance(v, (int, float, bool)) or v is None:
            cleaned[str(k)[:64]] = v
        # nested dict/list intentionally dropped to bound depth + size
    if len(json.dumps(cleaned, default=str)) > _MAX_PROPS_BYTES:
        return {}
    return cleaned


def _host_matches(hostname: str, domain: str) -> bool:
    """A hit's URL host must equal the Site's configured domain (or a subdomain of it)."""
    if not domain:
        return True  # unconfigured site domain — skip check
    if not hostname:
        return False
    hostname = hostname.lower()
    domain = domain.lower().strip()
    return hostname == domain or hostname.endswith("." + domain)


def _payload(request) -> dict:
    if request.method == "POST":
        try:
            return json.loads(request.body.decode("utf-8") or "{}")
        except (ValueError, UnicodeDecodeError):
            return {}
    return {k: v for k, v in request.GET.items()}


def _record(request, data: dict) -> bool:
    key = (data.get("k") or "").strip()
    if not key:
        return False
    site = Site.objects.filter(public_key=key, is_active=True).only("id", "domain").first()
    if not site:
        return False

    url = (data.get("u") or "")[:1024]
    parsed = urlparse(url) if url else None
    path = (parsed.path if parsed else "")[:512] or "/"
    hostname = (parsed.hostname if parsed else "") or ""
    # Reject hits whose URL host doesn't belong to the registered site.
    if not _host_matches(hostname, site.domain):
        return False
    q = parse_qs(parsed.query) if parsed else {}

    def _utm(name):
        return (q.get(name, [""])[0])[:128]

    ev_type = Hit.EVENT_CUSTOM if (data.get("t") == "event") else Hit.EVENT_PAGEVIEW
    ev_name = (data.get("n") or "")[:120] if ev_type == Hit.EVENT_CUSTOM else ""
    session_id = (data.get("sid") or "")[:64]

    ip = client_ip(request)
    ua = request.META.get("HTTP_USER_AGENT", "")[:512]
    ua_info = parse_ua(ua)
    ref = (data.get("r") or request.META.get("HTTP_REFERER", ""))[:1024]

    props = _clean_props(data.get("p"))

    # Country headers are only trustworthy when traffic is forced through Cloudflare.
    behind_cf = getattr(settings, "BEHIND_CLOUDFLARE", False)
    country = request.META.get("HTTP_CF_IPCOUNTRY_NAME", "")[:64] if behind_cf else ""
    country_code = request.META.get("HTTP_CF_IPCOUNTRY", "")[:2] if behind_cf else ""

    Hit.objects.create(
        site=site,
        event_type=ev_type,
        event_name=ev_name,
        session_id=session_id,
        path=path,
        hostname=hostname[:255],
        title=(data.get("ti") or "")[:255],
        ip_hash=hash_ip(ip, _SALT),
        # Day-bucketed so a returning visitor next day counts as unique-per-day.
        visitor_hash=hash_visitor(ip, ua + timezone.now().strftime("%Y%m%d"), _SALT),
        country=country,
        country_code=country_code,
        device=ua_info["device"],
        os=ua_info["os"],
        browser=ua_info["browser"],
        referer=ref,
        referer_host=referer_host(ref)[:255],
        utm_source=_utm("utm_source"),
        utm_medium=_utm("utm_medium"),
        utm_campaign=_utm("utm_campaign"),
        utm_term=_utm("utm_term"),
        utm_content=_utm("utm_content"),
        props=props,
    )
    return True


@csrf_exempt
@api_view(["GET", "POST"])
@authentication_classes([])
@permission_classes([AllowAny])
@throttle_classes([AnonRateThrottle])
def collect(request):
    try:
        _record(request, _payload(request))
    except Exception:
        log.exception("analytics collect failed")

    if request.method == "GET":
        return HttpResponse(_PIXEL, content_type="image/gif")
    return JsonResponse({"ok": True}, status=200)


_TAG_JS = """(function(){
  var s = document.currentScript;
  var key = s && s.getAttribute('data-site');
  if (!key) return;
  var ENDPOINT = (s.src || '').replace(/tag\\.js.*$/, 'collect');

  // Cookieless per-tab session id. sessionStorage clears on tab close.
  function sid(){
    try {
      var v = sessionStorage.getItem('_khub_sid');
      if (!v) {
        v = (window.crypto && crypto.randomUUID) ? crypto.randomUUID()
          : (Date.now().toString(36) + Math.random().toString(36).slice(2));
        sessionStorage.setItem('_khub_sid', v);
      }
      return v;
    } catch(e){ return ''; }
  }

  function send(body){
    try {
      var json = JSON.stringify(body);
      if (navigator.sendBeacon) {
        navigator.sendBeacon(ENDPOINT, new Blob([json], {type: 'text/plain'}));
      } else {
        fetch(ENDPOINT, {method:'POST', body: json, headers:{'Content-Type':'text/plain'}, keepalive:true, mode:'no-cors'});
      }
    } catch(e){}
  }

  function pageview(){
    send({k: key, t: 'pageview', sid: sid(), u: location.href, r: document.referrer, ti: document.title});
  }

  // Expose custom-event API: khub('signup', {plan:'pro'})
  window.khub = function(name, props){
    send({k: key, t: 'event', n: String(name), sid: sid(), u: location.href, r: document.referrer, ti: document.title, p: props || {}});
  };

  // SPA route changes.
  var push = history.pushState;
  history.pushState = function(){ push.apply(this, arguments); pageview(); };
  window.addEventListener('popstate', pageview);

  pageview();
})();"""


@csrf_exempt
@api_view(["GET"])
@authentication_classes([])
@permission_classes([AllowAny])
def tag_js(request):
    resp = HttpResponse(_TAG_JS, content_type="application/javascript")
    resp["Cache-Control"] = "public, max-age=3600"
    resp["Access-Control-Allow-Origin"] = "*"
    return resp
