// Prep-guide HTML parser.
// Given a raw HTML string from an AE prep doc (like the EQS example),
// return { fields, selectedUseCases, evidence } where:
//   fields: { company, industry, headcount, language, ats, websiteUrl, careerUrl }
//   selectedUseCases: array of use-case ids matched against TB_DATA.USE_CASES
//   evidence: short bullet list of what was found (for the "Imported from file" panel)

function parsePrepGuide(html) {
  const doc = new DOMParser().parseFromString(html, "text/html");
  const text = (doc.body?.innerText || doc.body?.textContent || "").replace(/\s+/g, " ").trim();

  const evidence = [];
  const push = (k, v) => { if (v) evidence.push(`${k}: ${v}`); };

  // --- Company name: try <h1> that looks like "X Group — AE..." or <title>
  let company = "";
  const h1 = doc.querySelector("h1");
  if (h1) {
    const m = h1.textContent.match(/^(.+?)\s*[—–-]\s*(?:AE|Demo|Prep|Account|Prospect|Tenant)/i);
    if (m) company = m[1].trim();
  }
  if (!company) {
    const title = (doc.querySelector("title")?.textContent || "").trim();
    const tm = title.match(/^(.+?)\s*[—–-]/);
    if (tm) company = tm[1].trim();
  }
  // fallback: look for "<Company> has ~N employees" pattern
  if (!company) {
    const m = text.match(/([A-Z][\w&.\- ]{2,40})\s+(?:has|hires|is a)\s/);
    if (m) company = m[1].trim();
  }
  push("Company", company);

  // --- Headcount
  let headcount = "";
  const hcMatch = text.match(/~?\s*([\d,]{2,6})\s*(?:employees|people|FTE)/i);
  if (hcMatch) {
    const n = parseInt(hcMatch[1].replace(/,/g, ""), 10);
    if (n < 500) headcount = "< 500";
    else if (n < 2000) headcount = "500 – 2,000";
    else if (n < 5000) headcount = "2,000 – 5,000";
    else headcount = "5,000+";
    push("Headcount", `${n.toLocaleString()} → ${headcount}`);
  }

  // --- Industry (fuzzy match against our catalog)
  const INDUSTRIES = window.TB_DATA.INDUSTRIES;
  let industry = "";
  const industryHints = [
    { rx: /regtech|compliance|cloud software|saas|software/i, v: "Technology & Software" },
    { rx: /bank|financial services|fintech|insurance/i,       v: "Banking & Financial Services" },
    { rx: /retail|consumer goods|fmcg/i,                       v: "Retail & Consumer Goods" },
    { rx: /healthcare|pharma|life sciences|medical/i,          v: "Healthcare & Life Sciences" },
    { rx: /manufactur|industrial|automotive/i,                 v: "Manufacturing & Industrial" },
    { rx: /consult|professional services|legal|audit/i,        v: "Professional Services" },
    { rx: /public sector|government|municipal/i,               v: "Public Sector" },
    { rx: /logistics|transport|shipping|freight/i,             v: "Transport & Logistics" },
    { rx: /energy|utilit|oil|gas|power/i,                      v: "Energy & Utilities" },
    { rx: /hospitality|hotel|leisure|travel/i,                 v: "Hospitality & Leisure" },
  ];
  for (const h of industryHints) {
    if (h.rx.test(text)) { industry = h.v; break; }
  }
  if (industry && INDUSTRIES.includes(industry)) push("Industry", industry);
  else industry = "";

  // --- Language
  let language = "EN";
  const langMatch = text.match(/Language:\s*(English|German|French|Deutsch|Français)/i);
  if (langMatch) {
    const l = langMatch[1].toLowerCase();
    language = /deutsch|german/.test(l) ? "DE" : /fran/.test(l) ? "FR" : "EN";
  } else if (/\bBetriebsrat\b|Deutsch|\bDACH\b/i.test(text)) {
    language = "DE";
  }
  push("Language", language);

  // --- ATS
  let ats = "";
  const atsPatterns = [
    /(?:ATS\s*(?:\/\s*HRIS)?|uses?|on)\s*[:\-]?\s*(Personio|Workday|Greenhouse|SuccessFactors|SAP SuccessFactors|Taleo|iCIMS|Lever|Teamtailor|BambooHR|Cornerstone|SmartRecruiters|Jobvite|rexx|d\.vinci)/i,
    /(Personio|Workday|Greenhouse|SuccessFactors|SAP SuccessFactors|Taleo|iCIMS|Lever|Teamtailor|BambooHR|Cornerstone|SmartRecruiters|Jobvite|rexx|d\.vinci)\s+confirmed/i,
  ];
  for (const rx of atsPatterns) {
    const m = text.match(rx);
    if (m) { ats = m[1]; break; }
  }
  push("ATS", ats);

  // --- URLs (website + career page)
  const allLinks = [...doc.querySelectorAll("a[href]")].map(a => a.href).filter(h => /^https?:/i.test(h));
  // also scrape raw URLs from text
  const textUrls = (text.match(/https?:\/\/[^\s<>"')]+/g) || []).map(u => u.replace(/[.,;:)]+$/, ""));
  const urls = [...new Set([...allLinks, ...textUrls])];
  let websiteUrl = "", careerUrl = "";
  for (const u of urls) {
    const low = u.toLowerCase();
    if (!careerUrl && /career|karriere|jobs|recrui|stellen/i.test(low)) careerUrl = u;
  }
  // pick any URL as website candidate, preferring short/root-ish domains
  for (const u of urls) {
    try {
      const p = new URL(u);
      if (!websiteUrl && !/career|karriere|jobs|recrui|stellen/i.test(p.pathname + p.host)) {
        websiteUrl = `${p.protocol}//${p.host}`;
      }
    } catch {}
  }
  // If no website but we have a career URL, derive domain
  if (!websiteUrl && careerUrl) {
    try { const p = new URL(careerUrl); websiteUrl = `${p.protocol}//${p.host}`; } catch {}
  }
  // If mention of a domain in text (e.g., "eqs.com/about-eqs/careers/"), use that too
  if (!careerUrl) {
    const m = text.match(/([\w-]+\.(?:com|de|fr|io|co|net|org)(?:\.\w{2,4})?)\/([\w\-/]*(?:career|karriere|jobs|stellen|recrui)[\w\-/]*)/i);
    if (m) careerUrl = `https://${m[1]}/${m[2]}`;
  }
  push("Website", websiteUrl);
  push("Career page", careerUrl);

  // --- Use case detection.
  // The guide uses patterns like "USE CASE N · STRONG FIT" / "POSSIBLE FIT" in badges,
  // and a "Rejected Use Cases" section below.
  const USE_CASES = window.TB_DATA.USE_CASES;
  const selected = new Set();
  const rejected = new Set();

  // 1. Find each .uc-header and check the badge class / text
  const ucHeaders = doc.querySelectorAll(".uc-header, [class*=uc-header]");
  ucHeaders.forEach(h => {
    const badge = h.querySelector(".uc-badge")?.textContent || "";
    const title = h.querySelector(".uc-title")?.textContent || "";
    const isFit = /STRONG FIT|POSSIBLE FIT|GOOD FIT/i.test(badge);
    if (!isFit) return;
    const uc = matchUseCase(title);
    if (uc) selected.add(uc.id);
  });

  // 2. Rejected list — mark explicitly-rejected cases so we don't pick them up from loose text mentions
  const rejectedLis = doc.querySelectorAll(".rejected-list li, [class*=rejected] li");
  rejectedLis.forEach(li => {
    const name = li.querySelector("span")?.textContent || li.textContent.split("—")[0] || "";
    const uc = matchUseCase(name);
    if (uc) rejected.add(uc.id);
  });

  // 3. Fallback: loose textual mentions if no .uc-header was present.
  if (selected.size === 0) {
    for (const uc of USE_CASES) {
      if (uc.soon) continue;
      if (rejected.has(uc.id)) continue;
      const rx = new RegExp(`\\b${uc.name.replace(/ /g, "\\s+")}\\b.{0,60}(strong fit|good fit|possible fit|recommended|top priority)`, "i");
      if (rx.test(text)) selected.add(uc.id);
    }
  }

  // Remove anything in the rejected set from selected (explicit rejection wins)
  rejected.forEach(id => selected.delete(id));

  const selectedUseCases = [...selected];
  if (selectedUseCases.length) evidence.push(`Use cases: ${selectedUseCases.join(", ")}`);
  if (rejected.size) evidence.push(`Rejected in doc: ${[...rejected].join(", ")}`);

  return {
    fields: { company, industry, headcount, language, ats, websiteUrl, careerUrl },
    selectedUseCases,
    evidence,
  };
}

function matchUseCase(rawTitle) {
  const USE_CASES = window.TB_DATA.USE_CASES;
  const t = rawTitle.toLowerCase();
  // explicit name matches
  const byName = USE_CASES.find(u => t.includes(u.name.toLowerCase()));
  if (byName) return byName;
  // loose mappings
  if (/spontaneous|walk-?in/.test(t)) return USE_CASES.find(u => u.id === "spontaneous");
  if (/general referral|referral program/.test(t)) return USE_CASES.find(u => u.id === "general-referral");
  if (/referral/.test(t) && !/rejected/.test(t)) return USE_CASES.find(u => u.id === "referral");
  if (/alumni/.test(t)) return USE_CASES.find(u => u.id === "alumni");
  if (/event|campus/.test(t)) return USE_CASES.find(u => u.id === "event");
  if (/internal mobility|internal move/.test(t)) return USE_CASES.find(u => u.id === "internal-mobility");
  if (/active sourcing|sourcing/.test(t)) return USE_CASES.find(u => u.id === "active-sourcing");
  if (/silver medall|silver med/.test(t)) return USE_CASES.find(u => u.id === "silver-medallist");
  if (/stories|storytelling|employer brand/.test(t)) return USE_CASES.find(u => u.id === "stories");
  if (/career site/.test(t)) return USE_CASES.find(u => u.id === "career-site");
  if (/qualify/.test(t)) return USE_CASES.find(u => u.id === "qualify-agent");
  return null;
}

window.parsePrepGuide = parsePrepGuide;
