#!/usr/bin/env bash
set -Eeuo pipefail

VERSION="0.1.0"
DEFAULT_MAX_CANDIDATES=200
DEFAULT_CONCURRENCY=8
DEFAULT_TIMEOUT=5
PASS_THRESHOLD=70
USER_AGENT="${RDF_USER_AGENT:-reality-domain-filter/0.1 local-script - local-admin@example.invalid}"

IP=""
MAX_CANDIDATES="$DEFAULT_MAX_CANDIDATES"
CONCURRENCY="$DEFAULT_CONCURRENCY"
TIMEOUT_SEC="$DEFAULT_TIMEOUT"
SEED_DOMAIN_FILE=""
OUT_DIR=""
KEEP_CDN=0
TARGET_ASN=""
TARGET_PREFIX=""
TARGET_ASN_NAME=""
TARGET_CC=""
TARGET_REGISTRY=""
TIMEOUT_BIN=""
XRAY_BIN=""
TMP_ROOT=""
RAW_CANDIDATES=""
CDN_ASNS_FILE=""
RESULT_DELIM=$'\037'
HAS_DIG=0
HAS_JQ=0
HAS_PYTHON3=0
HAS_GETENT=0
HAS_HOST=0
HAS_NSLOOKUP=0

usage() {
  cat <<'USAGE'
Reality domain filter for Linux.

Usage:
  ./reality-domain-filter.sh [options]

Options:
  --ip <ip>                    Server IP to profile. Defaults to public IP from api.ipify.org.
  --max-candidates <n>         Max candidates to score. Default: 200.
  --concurrency <n>            Concurrent TLS checks. Default: 8.
  --timeout <sec>              Per-domain network timeout. Default: 5.
  --seed-domain-file <file>    Optional newline-delimited domain seed file.
  --out-dir <dir>              Output directory. Default: ./out/<timestamp>.
  --keep-cdn                   Keep CDN/shared-edge candidates and mark them instead of filtering.
  --help                       Show this help.
  --version                    Show version.

Outputs:
  candidates.txt
  result.csv
  pass.txt
  summary.txt
USAGE
}

log() {
  printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*" >&2
}

warn() {
  printf '[WARN] %s\n' "$*" >&2
}

die() {
  printf '[ERROR] %s\n' "$*" >&2
  exit 1
}

cleanup() {
  if [[ -n "${TMP_ROOT:-}" && -d "$TMP_ROOT" ]]; then
    rm -rf "$TMP_ROOT"
  fi
}

trap cleanup EXIT

parse_args() {
  while (($# > 0)); do
    case "$1" in
      --ip)
        [[ $# -ge 2 ]] || die "--ip requires a value"
        IP="$2"
        shift 2
        ;;
      --max-candidates)
        [[ $# -ge 2 ]] || die "--max-candidates requires a value"
        MAX_CANDIDATES="$2"
        shift 2
        ;;
      --concurrency)
        [[ $# -ge 2 ]] || die "--concurrency requires a value"
        CONCURRENCY="$2"
        shift 2
        ;;
      --timeout)
        [[ $# -ge 2 ]] || die "--timeout requires a value"
        TIMEOUT_SEC="$2"
        shift 2
        ;;
      --seed-domain-file)
        [[ $# -ge 2 ]] || die "--seed-domain-file requires a value"
        SEED_DOMAIN_FILE="$2"
        shift 2
        ;;
      --out-dir)
        [[ $# -ge 2 ]] || die "--out-dir requires a value"
        OUT_DIR="$2"
        shift 2
        ;;
      --keep-cdn)
        KEEP_CDN=1
        shift
        ;;
      --help|-h)
        usage
        exit 0
        ;;
      --version)
        printf '%s\n' "$VERSION"
        exit 0
        ;;
      *)
        die "unknown option: $1"
        ;;
    esac
  done
}

is_positive_int() {
  [[ "$1" =~ ^[1-9][0-9]*$ ]]
}

validate_args() {
  is_positive_int "$MAX_CANDIDATES" || die "--max-candidates must be a positive integer"
  is_positive_int "$CONCURRENCY" || die "--concurrency must be a positive integer"
  is_positive_int "$TIMEOUT_SEC" || die "--timeout must be a positive integer"
  if [[ -n "$SEED_DOMAIN_FILE" && ! -r "$SEED_DOMAIN_FILE" ]]; then
    die "seed file is not readable: $SEED_DOMAIN_FILE"
  fi
}

find_timeout_bin() {
  if command -v timeout >/dev/null 2>&1; then
    TIMEOUT_BIN="$(command -v timeout)"
  elif command -v gtimeout >/dev/null 2>&1; then
    TIMEOUT_BIN="$(command -v gtimeout)"
  else
    die "missing required command: timeout"
  fi
}

require_dependencies() {
  local missing=()
  local cmd
  for cmd in curl openssl awk sed sort; do
    if ! command -v "$cmd" >/dev/null 2>&1; then
      missing+=("$cmd")
    fi
  done
  if ((${#missing[@]} > 0)); then
    die "missing required command(s): ${missing[*]}"
  fi
  find_timeout_bin
  command -v dig >/dev/null 2>&1 && HAS_DIG=1
  command -v jq >/dev/null 2>&1 && HAS_JQ=1
  command -v python3 >/dev/null 2>&1 && HAS_PYTHON3=1
  command -v getent >/dev/null 2>&1 && HAS_GETENT=1
  command -v host >/dev/null 2>&1 && HAS_HOST=1
  command -v nslookup >/dev/null 2>&1 && HAS_NSLOOKUP=1
  if ((HAS_JQ == 0 && HAS_PYTHON3 == 0)); then
    warn "jq and python3 are both missing; RapidDNS JSON parsing will be skipped"
  fi
  if ((HAS_DIG == 0 && HAS_GETENT == 0)); then
    warn "dig and getent are both missing; DNS scoring will be limited"
  fi
  if command -v xray >/dev/null 2>&1; then
    XRAY_BIN="$(command -v xray)"
  fi
}

prepare_paths() {
  if [[ -z "$OUT_DIR" ]]; then
    OUT_DIR="./out/$(date '+%Y%m%d-%H%M%S')"
  fi
  mkdir -p "$OUT_DIR"
  TMP_ROOT="$(mktemp -d)"
  RAW_CANDIDATES="$TMP_ROOT/raw-candidates.tsv"
  CDN_ASNS_FILE="$TMP_ROOT/cdn-asns.txt"
  : > "$RAW_CANDIDATES"
  : > "$CDN_ASNS_FILE"
}

is_ipv4() {
  local ip="$1"
  [[ "$ip" =~ ^([0-9]{1,3}\.){3}[0-9]{1,3}$ ]] || return 1
  local IFS=.
  local -a octets
  read -r -a octets <<< "$ip"
  local octet
  for octet in "${octets[@]}"; do
    ((octet >= 0 && octet <= 255)) || return 1
  done
}

reverse_ipv4_octets() {
  local ip="$1"
  local IFS=.
  local -a octets
  read -r -a octets <<< "$ip"
  printf '%s.%s.%s.%s' "${octets[3]}" "${octets[2]}" "${octets[1]}" "${octets[0]}"
}

ipv4_24() {
  local ip="$1"
  local IFS=.
  local -a octets
  read -r -a octets <<< "$ip"
  printf '%s.%s.%s.0/24' "${octets[0]}" "${octets[1]}" "${octets[2]}"
}

get_public_ip() {
  local public_ip
  public_ip="$(curl -fsSL --max-time 8 -A "$USER_AGENT" https://api.ipify.org 2>/dev/null || true)"
  if [[ -z "$public_ip" ]]; then
    die "failed to detect public IP; pass --ip explicitly"
  fi
  printf '%s\n' "$public_ip"
}

strip_quotes() {
  sed -E 's/^"//; s/"$//'
}

dns_txt_query() {
  local name="$1"
  if ((HAS_DIG == 1)); then
    dig +short TXT "$name" 2>/dev/null | strip_quotes
  elif ((HAS_HOST == 1)); then
    host -t TXT "$name" 2>/dev/null | sed -nE 's/.*descriptive text "(.+)"/\1/p'
  elif ((HAS_NSLOOKUP == 1)); then
    nslookup -type=TXT "$name" 2>/dev/null | sed -nE 's/.*text = "(.+)"/\1/p'
  fi
}

lookup_target_asn() {
  local ip="$1"
  if ! is_ipv4 "$ip"; then
    warn "ASN lookup currently supports IPv4 only; skipping ASN profile for $ip"
    return 0
  fi

  local reversed txt desc
  reversed="$(reverse_ipv4_octets "$ip")"
  txt="$(dns_txt_query "${reversed}.origin.asn.cymru.com" | head -n 1 || true)"
  if [[ -z "$txt" ]]; then
    warn "Team Cymru ASN lookup returned no result for $ip"
    return 0
  fi

  TARGET_ASN="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $1); print $1}')"
  TARGET_PREFIX="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $2); print $2}')"
  TARGET_CC="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $3); print $3}')"
  TARGET_REGISTRY="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $4); print $4}')"

  if [[ -n "$TARGET_ASN" ]]; then
    desc="$(dns_txt_query "AS${TARGET_ASN}.asn.cymru.com" | head -n 1 || true)"
    TARGET_ASN_NAME="$(printf '%s\n' "$desc" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $5); print $5}')"
  fi
}

normalize_domain() {
  local raw="$1"
  local domain
  domain="$(printf '%s' "$raw" \
    | tr '[:upper:]' '[:lower:]' \
    | sed -E 's/^[[:space:]]+|[[:space:]]+$//g; s#^[a-z][a-z0-9+.-]*://##; s#/.*$##; s/:[0-9]+$//; s/^\*\.//; s/^\.+//; s/\.+$//')"
  [[ -n "$domain" ]] || return 1
  [[ "$domain" =~ ^([a-z0-9]([-a-z0-9]{0,61}[a-z0-9])?\.)+[a-z]{2,63}$ ]] || return 1
  printf '%s\n' "$domain"
}

extract_domains() {
  grep -Eio '([a-z0-9]([-a-z0-9]*[a-z0-9])?\.)+[a-z]{2,63}' | sed 's/^\*\.//' | sort -u || true
}

add_candidate() {
  local raw="$1"
  local source="$2"
  local domain
  domain="$(normalize_domain "$raw" || true)"
  [[ -n "$domain" ]] || return 0
  printf '%s\t%s\n' "$domain" "$source" >> "$RAW_CANDIDATES"
}

add_candidates_from_text() {
  local source="$1"
  local text="$2"
  local domain
  while IFS= read -r domain; do
    add_candidate "$domain" "$source"
  done < <(printf '%s\n' "$text" | extract_domains)
}

urlencode() {
  if ((HAS_JQ == 1)); then
    jq -rn --arg value "$1" '$value|@uri'
  elif ((HAS_PYTHON3 == 1)); then
    python3 -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.argv[1], safe=""))' "$1"
  else
    printf '%s\n' "$1" | sed 's#/#%2F#g; s# #%20#g'
  fi
}

fetch_hackertarget() {
  local query="$1"
  local source="$2"
  local body
  log "fetch HackerTarget candidates for $query"
  body="$(curl -fsSLG --max-time 15 -A "$USER_AGENT" \
    --data-urlencode "q=$query" \
    "https://api.hackertarget.com/reverseiplookup/" 2>/dev/null || true)"
  if [[ -z "$body" || "$body" =~ (API|error|invalid|No records|No DNS) ]]; then
    return 0
  fi
  add_candidates_from_text "$source" "$body"
}

fetch_rapiddns() {
  local query="$1"
  local source="$2"
  local encoded body domains
  if ((HAS_JQ == 0 && HAS_PYTHON3 == 0)); then
    warn "skip RapidDNS for $query: jq/python3 missing"
    return 0
  fi
  encoded="$(urlencode "$query")"
  log "fetch RapidDNS candidates for $query"
  body="$(curl -fsSL --max-time 15 -A "$USER_AGENT" -H 'Accept: application/json' \
    "https://rapiddns.io/api/search/${encoded}?page=1&pagesize=${MAX_CANDIDATES}" 2>/dev/null || true)"
  [[ -n "$body" ]] || return 0
  if ((HAS_JQ == 1)); then
    domains="$(printf '%s\n' "$body" | jq -r '.data.data[]?.subdomain // empty' 2>/dev/null || true)"
  else
    domains="$(printf '%s\n' "$body" | python3 -c '
import json, sys
try:
    payload = json.load(sys.stdin)
except Exception:
    sys.exit(0)
for item in payload.get("data", {}).get("data", []):
    value = item.get("subdomain") if isinstance(item, dict) else None
    if value:
        print(value)
' 2>/dev/null || true)"
  fi
  [[ -n "$domains" ]] || return 0
  add_candidates_from_text "$source" "$domains"
}

fetch_ptr() {
  local ip="$1"
  local ptrs
  log "fetch PTR candidates for $ip"
  if ((HAS_DIG == 1)); then
    ptrs="$(dig +short -x "$ip" 2>/dev/null || true)"
  elif ((HAS_HOST == 1)); then
    ptrs="$(host "$ip" 2>/dev/null | awk '/domain name pointer/ {print $NF}' || true)"
  elif ((HAS_NSLOOKUP == 1)); then
    ptrs="$(nslookup "$ip" 2>/dev/null | awk -F'= ' '/name =/ {print $2}' || true)"
  else
    ptrs=""
  fi
  add_candidates_from_text "ptr" "$ptrs"
}

add_seed_file_candidates() {
  local file="$1"
  local line
  [[ -n "$file" ]] || return 0
  log "load seed candidates from $file"
  while IFS= read -r line || [[ -n "$line" ]]; do
    line="${line%%#*}"
    add_candidate "$line" "seed"
  done < "$file"
}

download_cdn_asns() {
  local body
  body="$(curl -fsSL --max-time 20 -A "$USER_AGENT" "https://bgp.tools/tags/cdn.csv" 2>/dev/null || true)"
  if [[ -z "$body" ]]; then
    warn "could not fetch bgp.tools CDN ASN tag list; falling back to keyword detection"
    return 0
  fi
  printf '%s\n' "$body" | grep -Eio 'AS?[0-9]+' | sed -E 's/^AS//' | sort -n -u > "$CDN_ASNS_FILE" || true
}

collect_candidates() {
  fetch_ptr "$IP"

  if is_ipv4 "$IP"; then
    local cidr24
    cidr24="$(ipv4_24 "$IP")"
    fetch_hackertarget "$IP" "hackertarget-ip"
    fetch_hackertarget "$cidr24" "hackertarget-cidr24"
    fetch_rapiddns "$IP" "rapiddns-ip"
    fetch_rapiddns "$cidr24" "rapiddns-cidr24"
  else
    fetch_hackertarget "$IP" "hackertarget-ip"
    fetch_rapiddns "$IP" "rapiddns-ip"
  fi

  add_seed_file_candidates "$SEED_DOMAIN_FILE"
}

write_candidate_outputs() {
  local aggregated="$TMP_ROOT/candidates-with-sources.tsv"
  if [[ ! -s "$RAW_CANDIDATES" ]]; then
    : > "$OUT_DIR/candidates.txt"
    : > "$aggregated"
    printf '%s\n' "$aggregated"
    return 0
  fi

  awk -F'\t' '
    NF >= 2 {
      if ($1 == "") next
      if (seen[$1, $2]++) next
      if (sources[$1] == "") sources[$1] = $2
      else sources[$1] = sources[$1] "+" $2
    }
    END {
      for (domain in sources) {
        priority = 30
        if (sources[domain] ~ /(^|\+)seed(\+|$)/) priority = 0
        else if (sources[domain] ~ /(^|\+)ptr(\+|$)/) priority = 5
        else if (domain ~ /(^|\.)edu(\.|$)|university|college|school|academy|library|archive|museum|(^|\.)gov(\.|$)|municipal|city|county|public/) priority = 10
        print priority "\t" domain "\t" sources[domain]
      }
    }
  ' "$RAW_CANDIDATES" \
    | sort -t$'\t' -k1,1n -k2,2 \
    | head -n "$MAX_CANDIDATES" > "$aggregated"

  cut -f2- "$aggregated" > "${aggregated}.clean"
  mv "${aggregated}.clean" "$aggregated"
  cut -f1 "$aggregated" > "$OUT_DIR/candidates.txt"
  printf '%s\n' "$aggregated"
}

resolve_ips() {
  local domain="$1"
  if ((HAS_DIG == 1)); then
    {
      dig +short A "$domain" 2>/dev/null | grep -E '^[0-9.]+$' || true
      dig +short AAAA "$domain" 2>/dev/null | grep -E '^[0-9a-fA-F:]+$' || true
    } | sort -u | paste -sd ';' -
  elif ((HAS_GETENT == 1)); then
    getent ahosts "$domain" 2>/dev/null | awk '{print $1}' | sort -u | paste -sd ';' -
  fi
}

first_ipv4_from_list() {
  tr ';' '\n' | grep -E '^([0-9]{1,3}\.){3}[0-9]{1,3}$' | head -n 1 || true
}

ipv4_is_reserved() {
  local ip="$1"
  is_ipv4 "$ip" || return 1
  local IFS=.
  local -a o
  read -r -a o <<< "$ip"
  local a="${o[0]}" b="${o[1]}" c="${o[2]}"

  ((a == 0)) && return 0
  ((a == 10)) && return 0
  ((a == 100 && b >= 64 && b <= 127)) && return 0
  ((a == 127)) && return 0
  ((a == 169 && b == 254)) && return 0
  ((a == 172 && b >= 16 && b <= 31)) && return 0
  ((a == 192 && b == 0 && c == 0)) && return 0
  ((a == 192 && b == 0 && c == 2)) && return 0
  ((a == 192 && b == 168)) && return 0
  ((a == 198 && b == 18)) && return 0
  ((a == 198 && b == 19)) && return 0
  ((a == 198 && b == 51 && c == 100)) && return 0
  ((a == 203 && b == 0 && c == 113)) && return 0
  ((a >= 224)) && return 0
  return 1
}

lookup_ip_asn() {
  local ip="$1"
  local reversed txt asn prefix desc asn_name
  [[ -n "$ip" ]] || return 0
  is_ipv4 "$ip" || return 0
  reversed="$(reverse_ipv4_octets "$ip")"
  txt="$(dns_txt_query "${reversed}.origin.asn.cymru.com" | head -n 1 || true)"
  [[ -n "$txt" ]] || return 0
  asn="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $1); print $1}')"
  prefix="$(printf '%s\n' "$txt" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $2); print $2}')"
  asn_name=""
  if [[ -n "$asn" ]]; then
    desc="$(dns_txt_query "AS${asn}.asn.cymru.com" | head -n 1 || true)"
    asn_name="$(printf '%s\n' "$desc" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/, "", $5); print $5}')"
  fi
  printf '%s\t%s\t%s\n' "$asn" "$prefix" "$asn_name"
}

contains_target_ip() {
  local ips="$1"
  local ip
  local -a ip_array
  IFS=';' read -r -a ip_array <<< "$ips"
  for ip in "${ip_array[@]}"; do
    [[ "$ip" == "$IP" ]] && return 0
  done
  return 1
}

keyword_bonus() {
  local domain="$1"
  local bonus=0
  local lower="$domain"
  if [[ "$lower" =~ (^|\.)edu(\.|$)|university|college|school|academy ]]; then
    bonus=$((bonus + 10))
  fi
  if [[ "$lower" =~ library|archive|museum ]]; then
    bonus=$((bonus + 8))
  fi
  if [[ "$lower" =~ (^|\.)gov(\.|$)|municipal|city|county|public ]]; then
    bonus=$((bonus + 8))
  fi
  printf '%s\n' "$bonus"
}

asn_name_is_cdn() {
  local asn_name="$1"
  local lower
  lower="$(printf '%s' "$asn_name" | tr '[:upper:]' '[:lower:]')"
  [[ "$lower" =~ cloudflare|akamai|fastly|cloudfront|edgecast|cdn|cachefly|limelight|bunny|stackpath|imperva|incapsula|jsdelivr|netlify|vercel|sucuri|cdn77 ]]
}

asn_is_tagged_cdn() {
  local asn="$1"
  [[ -n "$asn" && -s "$CDN_ASNS_FILE" ]] || return 1
  grep -qx "$asn" "$CDN_ASNS_FILE"
}

is_cdn_candidate() {
  local asn="$1"
  local asn_name="$2"
  asn_is_tagged_cdn "$asn" || asn_name_is_cdn "$asn_name"
}

tcp_443_ok() {
  local domain="$1"
  "$TIMEOUT_BIN" "$TIMEOUT_SEC" bash -c ":</dev/tcp/$1/443" _ "$domain" >/dev/null 2>&1
}

compact_text() {
  tr '\r\n\t' '   ' | sed -E 's/[[:space:]]+/ /g; s/^ //; s/ $//'
}

domain_matches_san() {
  local domain="$1"
  local san="$2"
  san="$(printf '%s' "$san" | tr '[:upper:]' '[:lower:]')"
  if [[ "$san" == "$domain" ]]; then
    return 0
  fi
  if [[ "$san" == \*.* ]]; then
    local suffix="${san#*.}"
    if [[ "$domain" == *".${suffix}" ]]; then
      local prefix="${domain%.${suffix}}"
      [[ "$prefix" != *.* && -n "$prefix" ]] && return 0
    fi
  fi
  return 1
}

cert_matches_domain() {
  local domain="$1"
  local cert_info="$2"
  local san
  while IFS= read -r san; do
    [[ -n "$san" ]] || continue
    if domain_matches_san "$domain" "$san"; then
      return 0
    fi
  done < <(printf '%s\n' "$cert_info" | grep -Eo 'DNS:[^, ]+' | sed 's/^DNS://')
  return 1
}

tls_probe() {
  local domain="$1"
  local output cert_pem cert_info protocol alpn verify cert_match not_after tls_ok

  output="$({
    "$TIMEOUT_BIN" "$TIMEOUT_SEC" openssl s_client \
      -connect "${domain}:443" \
      -servername "$domain" \
      -alpn "h2,http/1.1" \
      -showcerts </dev/null 2>&1 || true
  } | tr -d '\000')"

  tls_ok=0
  cert_match=0
  protocol="$(printf '%s\n' "$output" | awk -F': *' '/^[[:space:]]*Protocol[[:space:]]*:/ {print $2; exit}' | compact_text)"
  if [[ -z "$protocol" ]]; then
    protocol="$(printf '%s\n' "$output" | grep -Eo 'New, TLS[^, ]+' | head -n 1 | sed 's/^New, //' || true)"
  fi
  alpn="$(printf '%s\n' "$output" | awk -F': *' '/ALPN protocol/ {print $2; exit}' | compact_text)"
  verify="$(printf '%s\n' "$output" | grep -Eo 'Verify return code: [0-9]+ \([^)]+\)' | tail -n 1 | compact_text || true)"
  [[ -z "$verify" ]] && verify="$(printf '%s\n' "$output" | grep -Eo 'Verification: OK' | tail -n 1 | compact_text || true)"

  cert_pem="$(printf '%s\n' "$output" | awk '
    /-----BEGIN CERTIFICATE-----/ {capture=1}
    capture {print}
    /-----END CERTIFICATE-----/ {exit}
  ')"
  cert_info=""
  not_after=""
  if [[ -n "$cert_pem" ]]; then
    cert_info="$(printf '%s\n' "$cert_pem" | openssl x509 -noout -subject -issuer -dates -ext subjectAltName 2>/dev/null || true)"
    not_after="$(printf '%s\n' "$cert_info" | awk -F= '/^notAfter=/ {print $2; exit}' | compact_text)"
    if cert_matches_domain "$domain" "$cert_info"; then
      cert_match=1
    fi
  fi

  if [[ -n "$cert_pem" && ( "$verify" == *" 0 (ok)"* || "$verify" == "Verification: OK" ) ]]; then
    tls_ok=1
  elif [[ -n "$cert_pem" && -n "$protocol" ]]; then
    tls_ok=1
  fi

  printf '%s\t%s\t%s\t%s\t%s\n' "$tls_ok" "$cert_match" "$not_after" "$alpn" "$verify"
}

http_status_probe() {
  local domain="$1"
  curl -k -sS -o /dev/null \
    --max-time "$TIMEOUT_SEC" \
    --connect-timeout "$TIMEOUT_SEC" \
    --location \
    --max-redirs 2 \
    -A "$USER_AGENT" \
    -w '%{http_code}' \
    "https://${domain}/" 2>/dev/null || true
}

xray_probe() {
  local domain="$1"
  local output status
  [[ -n "$XRAY_BIN" ]] || {
    printf 'not_installed\n'
    return 0
  }
  output="$("$TIMEOUT_BIN" "$TIMEOUT_SEC" "$XRAY_BIN" tls ping "$domain" 2>&1 || true)"
  output="$(printf '%s\n' "$output" | compact_text)"
  if [[ -z "$output" ]]; then
    printf 'no_output\n'
    return 0
  fi
  if [[ "$output" =~ [Ee]rror|failed|timeout ]]; then
    status="fail"
  else
    status="ok"
  fi
  printf '%s:%s\n' "$status" "${output:0:180}"
}

append_reason() {
  local current="$1"
  local item="$2"
  if [[ -z "$current" ]]; then
    printf '%s' "$item"
  else
    printf '%s;%s' "$current" "$item"
  fi
}

score_domain() {
  local index="$1"
  local domain="$2"
  local source="$3"
  local output_file="$TMP_ROOT/results/${index}.tsv"
  local score=0 decision="fail" reason="" dns_ips first_ipv4 asn_info domain_asn domain_prefix domain_asn_name
  local same_ip=0 reserved_ip=0 diff_asn_public=0 is_cdn=0 tcp_ok=0 tls_ok=0 cert_match=0 http_ok=0 not_after="" alpn="" verify="" http_status="" xray_status=""
  local bonus tls_info

  bonus="$(keyword_bonus "$domain")"
  score=$((score + bonus))

  dns_ips="$(resolve_ips "$domain")"
  if [[ -n "$dns_ips" ]]; then
    score=$((score + 10))
  else
    reason="$(append_reason "$reason" "dns_no_record")"
  fi

  first_ipv4="$(printf '%s' "$dns_ips" | first_ipv4_from_list)"
  domain_asn=""
  domain_prefix=""
  domain_asn_name=""
  if [[ -n "$first_ipv4" ]]; then
    if ipv4_is_reserved "$first_ipv4"; then
      reserved_ip=1
      reason="$(append_reason "$reason" "reserved_or_private_ip")"
    else
      asn_info="$(lookup_ip_asn "$first_ipv4" || true)"
      domain_asn="$(printf '%s\n' "$asn_info" | awk -F'\t' '{print $1}')"
      domain_prefix="$(printf '%s\n' "$asn_info" | awk -F'\t' '{print $2}')"
      domain_asn_name="$(printf '%s\n' "$asn_info" | awk -F'\t' '{print $3}')"
      [[ -n "$domain_asn" ]] && score=$((score + 5))
      if [[ -n "$TARGET_ASN" && "$domain_asn" == "$TARGET_ASN" ]]; then
        score=$((score + 15))
      fi
    fi
  fi

  if [[ -n "$TARGET_ASN" && -n "$domain_asn" && "$domain_asn" != "$TARGET_ASN" && "$source" != *"seed"* ]]; then
    diff_asn_public=1
    reason="$(append_reason "$reason" "different_asn_current_dns")"
  fi

  if [[ -n "$dns_ips" ]] && contains_target_ip "$dns_ips"; then
    same_ip=1
    reason="$(append_reason "$reason" "resolves_to_target_ip")"
  fi

  if is_cdn_candidate "$domain_asn" "$domain_asn_name"; then
    is_cdn=1
    reason="$(append_reason "$reason" "cdn_or_shared_edge")"
    if ((KEEP_CDN == 1)); then
      score=$((score - 20))
    fi
  fi

  if tcp_443_ok "$domain"; then
    tcp_ok=1
    score=$((score + 15))
  else
    reason="$(append_reason "$reason" "tcp443_failed")"
  fi

  tls_info="$(tls_probe "$domain")"
  tls_ok="$(printf '%s\n' "$tls_info" | awk -F'\t' '{print $1}')"
  cert_match="$(printf '%s\n' "$tls_info" | awk -F'\t' '{print $2}')"
  not_after="$(printf '%s\n' "$tls_info" | awk -F'\t' '{print $3}')"
  alpn="$(printf '%s\n' "$tls_info" | awk -F'\t' '{print $4}')"
  verify="$(printf '%s\n' "$tls_info" | awk -F'\t' '{print $5}')"

  if [[ "$tls_ok" == "1" ]]; then
    score=$((score + 25))
  else
    reason="$(append_reason "$reason" "tls_failed")"
  fi

  if [[ "$cert_match" == "1" ]]; then
    score=$((score + 15))
  else
    reason="$(append_reason "$reason" "cert_san_mismatch")"
  fi

  if [[ "$alpn" == "h2" || "$alpn" == "http/1.1" ]]; then
    score=$((score + 5))
  fi

  http_status="$(http_status_probe "$domain")"
  if [[ "$http_status" =~ ^2|^3 ]]; then
    http_ok=1
    score=$((score + 10))
  elif [[ "$http_status" =~ ^4 ]]; then
    http_ok=1
    score=$((score + 5))
  else
    reason="$(append_reason "$reason" "http_unstable")"
  fi

  xray_status="$(xray_probe "$domain")"
  if [[ "$xray_status" == ok:* ]]; then
    score=$((score + 10))
  fi

  if ((reserved_ip == 1)); then
    decision="fail"
  elif ((same_ip == 1)); then
    decision="review_same_ip"
  elif ((is_cdn == 1 && KEEP_CDN == 0)); then
    decision="filtered_cdn"
  elif ((diff_asn_public == 1)); then
    decision="review_diff_asn"
  elif [[ -n "$dns_ips" && "$tcp_ok" == "1" && "$tls_ok" == "1" && "$cert_match" == "1" && "$http_ok" == "1" && "$score" -ge "$PASS_THRESHOLD" ]]; then
    decision="pass"
  elif [[ "$tls_ok" == "1" && "$cert_match" == "1" ]]; then
    decision="review"
  else
    decision="fail"
  fi

  [[ -n "$reason" ]] || reason="ok"
  printf '%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\037%s\n' \
    "$domain" "$score" "$decision" "$source" "$bonus" "$dns_ips" \
    "$domain_asn" "$domain_prefix" "$domain_asn_name" "$is_cdn" \
    "$tcp_ok" "$tls_ok" "$cert_match" "$not_after" "$alpn" "$http_status" \
    "$xray_status" "$reason" > "$output_file"
}

run_scoring() {
  local candidates_with_sources="$1"
  mkdir -p "$TMP_ROOT/results"
  : > "$TMP_ROOT/results.tsv"

  if [[ ! -s "$candidates_with_sources" ]]; then
    return 0
  fi

  local index=0 domain source active_jobs
  while IFS=$'\t' read -r domain source || [[ -n "${domain:-}" ]]; do
    [[ -n "$domain" ]] || continue
    while true; do
      active_jobs="$(jobs -pr | wc -l | awk '{print $1}')"
      ((active_jobs < CONCURRENCY)) && break
      sleep 0.2
    done
    index=$((index + 1))
    score_domain "$index" "$domain" "$source" &
  done < "$candidates_with_sources"
  wait || true

  if compgen -G "$TMP_ROOT/results/*.tsv" >/dev/null; then
    cat "$TMP_ROOT"/results/*.tsv | sort -t"$RESULT_DELIM" -k2,2nr > "$TMP_ROOT/results.tsv"
  fi
}

csv_escape() {
  local value="$1"
  value="${value//$'\r'/ }"
  value="${value//$'\n'/ }"
  value="${value//\"/\"\"}"
  printf '"%s"' "$value"
}

write_csv_line() {
  local first=1 field
  for field in "$@"; do
    if ((first == 0)); then
      printf ','
    fi
    csv_escape "$field"
    first=0
  done
  printf '\n'
}

write_result_csv() {
  local tsv="$TMP_ROOT/results.tsv"
  {
    write_csv_line \
      domain score decision source keyword_bonus dns_ips asn prefix asn_name is_cdn \
      tcp443 tls_ok cert_match cert_not_after alpn http_status xray_status reason
    if [[ -s "$tsv" ]]; then
      while IFS="$RESULT_DELIM" read -r domain score decision source bonus dns_ips asn prefix asn_name is_cdn tcp_ok tls_ok cert_match not_after alpn http_status xray_status reason; do
        write_csv_line \
          "$domain" "$score" "$decision" "$source" "$bonus" "$dns_ips" "$asn" "$prefix" "$asn_name" "$is_cdn" \
          "$tcp_ok" "$tls_ok" "$cert_match" "$not_after" "$alpn" "$http_status" "$xray_status" "$reason"
      done < "$tsv"
    fi
  } > "$OUT_DIR/result.csv"
}

write_pass_txt() {
  local tsv="$TMP_ROOT/results.tsv"
  if [[ -s "$tsv" ]]; then
    awk -v FS="$RESULT_DELIM" '$3 == "pass" {print $1}' "$tsv" > "$OUT_DIR/pass.txt"
  else
    : > "$OUT_DIR/pass.txt"
  fi
}

count_lines() {
  local file="$1"
  [[ -s "$file" ]] || {
    printf '0\n'
    return 0
  }
  wc -l < "$file" | awk '{print $1}'
}

write_summary() {
  local tsv="$TMP_ROOT/results.tsv"
  local candidate_count scored_count pass_count xray_mode cdn_policy
  candidate_count="$(count_lines "$OUT_DIR/candidates.txt")"
  scored_count=0
  if [[ -s "$tsv" ]]; then
    scored_count="$(wc -l < "$tsv" | awk '{print $1}')"
  fi
  pass_count="$(count_lines "$OUT_DIR/pass.txt")"
  if [[ -n "$XRAY_BIN" ]]; then
    xray_mode="enabled ($XRAY_BIN)"
  else
    xray_mode="not installed"
  fi
  if ((KEEP_CDN == 1)); then
    cdn_policy="keep and mark"
  else
    cdn_policy="filter"
  fi

  {
    printf 'Reality domain filter summary\n'
    printf 'Generated: %s\n' "$(date -Iseconds)"
    printf '\n'
    printf 'Input IP: %s\n' "$IP"
    printf 'Target ASN: %s\n' "${TARGET_ASN:-unknown}"
    printf 'Target prefix: %s\n' "${TARGET_PREFIX:-unknown}"
    printf 'Target ASN name: %s\n' "${TARGET_ASN_NAME:-unknown}"
    printf 'Target CC/registry: %s / %s\n' "${TARGET_CC:-unknown}" "${TARGET_REGISTRY:-unknown}"
    printf '\n'
    printf 'Max candidates: %s\n' "$MAX_CANDIDATES"
    printf 'Concurrency: %s\n' "$CONCURRENCY"
    printf 'Timeout: %ss\n' "$TIMEOUT_SEC"
    printf 'CDN policy: %s\n' "$cdn_policy"
    printf 'xray tls ping: %s\n' "$xray_mode"
    printf 'User-Agent: %s\n' "$USER_AGENT"
    printf '\n'
    printf 'Candidates: %s\n' "$candidate_count"
    printf 'Scored: %s\n' "$scored_count"
    printf 'Pass: %s\n' "$pass_count"
    printf '\n'
    printf 'Candidate sources:\n'
    if [[ -s "$RAW_CANDIDATES" ]]; then
      awk -F'\t' '{count[$2]++} END {for (source in count) print "- " source ": " count[source]}' "$RAW_CANDIDATES" | sort
    else
      printf '- none\n'
    fi
    printf '\n'
    printf 'Decision counts:\n'
    if [[ -s "$tsv" ]]; then
      awk -v FS="$RESULT_DELIM" '{count[$3]++} END {for (decision in count) print "- " decision ": " count[decision]}' "$tsv" | sort
    else
      printf '- none\n'
    fi
    printf '\n'
    printf 'Top failure/review reasons:\n'
    if [[ -s "$tsv" ]]; then
      awk -v FS="$RESULT_DELIM" '
        {
          n = split($18, parts, ";")
          for (i = 1; i <= n; i++) {
            if (parts[i] != "" && parts[i] != "ok") count[parts[i]]++
          }
        }
        END {
          for (reason in count) print count[reason] "\t" reason
        }
      ' "$tsv" | sort -nr | head -n 20 | awk -F'\t' '{print "- " $2 ": " $1}'
    else
      printf '- none\n'
    fi
  } > "$OUT_DIR/summary.txt"
}

main() {
  parse_args "$@"
  validate_args
  require_dependencies
  prepare_paths

  if [[ -z "$IP" ]]; then
    IP="$(get_public_ip)"
  fi
  log "profile IP: $IP"
  lookup_target_asn "$IP"
  download_cdn_asns
  collect_candidates

  local candidates_with_sources
  candidates_with_sources="$(write_candidate_outputs)"
  log "scoring $(count_lines "$OUT_DIR/candidates.txt") candidate(s)"
  run_scoring "$candidates_with_sources"
  write_result_csv
  write_pass_txt
  write_summary

  log "done: $OUT_DIR"
}

main "$@"
