<?php

declare(strict_types=1);

// Usage:
// php app/scripts/generate_fanza_sitemap.php --base-url=https://www.ecchi-na-saito.com/fanza --output=public_html/fanza/fanza-sitemap.xml --popular=500 --latest=500

$opts = getopt('', [
    'base-url::',
    'output::',
    'popular::',
    'latest::',
]);

$baseUrl = isset($opts['base-url']) && is_string($opts['base-url']) && trim($opts['base-url']) !== ''
    ? rtrim(trim($opts['base-url']), '/')
    : 'https://www.ecchi-na-saito.com/fanza';
$outputPath = isset($opts['output']) && is_string($opts['output']) && trim($opts['output']) !== ''
    ? trim($opts['output'])
    : __DIR__ . '/../../public_html/fanza/fanza-sitemap.xml';
$popularLimit = isset($opts['popular']) && is_numeric($opts['popular']) ? max(0, (int) $opts['popular']) : 500;
$latestLimit = isset($opts['latest']) && is_numeric($opts['latest']) ? max(0, (int) $opts['latest']) : 500;
$seriesLimit = 100;
$makerLimit = 40;
$genreLimit = 25;
$actressLimit = 200;

/**
 * @return array<int,array<string,string>>
 */
function growthLandingDefinitionsForSitemap(): array
{
    return [
        ['key' => 'lp_nurse', 'genre_keyword' => '看護婦・ナース'],
        ['key' => 'lp_big_boobs', 'genre_keyword' => '巨乳'],
        ['key' => 'lp_mature_wife', 'genre_keyword' => '人妻・主婦'],
        ['key' => 'lp_school_uniform', 'genre_keyword' => '制服'],
        ['key' => 'lp_creampie', 'genre_keyword' => '中出し'],
        ['key' => 'lp_mmgo', 'series_keyword' => 'マジックミラー号'],
        ['key' => 'lp_moodyz_nurse', 'maker_keyword' => 'MOODYZ', 'genre_keyword' => '看護婦・ナース'],
        ['key' => 'lp_moodyz_creampie', 'maker_keyword' => 'MOODYZ', 'genre_keyword' => '中出し'],
        ['key' => 'lp_s1_big_boobs', 'maker_keyword' => 'エスワン', 'genre_keyword' => '巨乳'],
        ['key' => 'lp_s1_debut', 'maker_keyword' => 'エスワン', 'genre_keyword' => 'デビュー作品'],
        ['key' => 'lp_idea_uniform', 'maker_keyword' => 'アイデアポケット', 'genre_keyword' => '制服'],
        ['key' => 'lp_prestige_amateur', 'maker_keyword' => 'プレステージ', 'genre_keyword' => '素人'],
        ['key' => 'lp_prestige_nampa', 'maker_keyword' => 'プレステージ', 'genre_keyword' => 'ナンパ'],
        ['key' => 'lp_sod_schoolgirl', 'maker_keyword' => 'SODクリエイト', 'genre_keyword' => '女子校生'],
        ['key' => 'lp_venus_mature', 'maker_keyword' => 'VENUS', 'genre_keyword' => '熟女'],
        ['key' => 'lp_kawaii_love', 'maker_keyword' => 'kawaii', 'genre_keyword' => 'ラブラブ・カップル'],
        ['key' => 'lp_hunter_planning', 'maker_keyword' => 'Hunter', 'product_keyword' => '企画'],
        ['key' => 'lp_faleno_new', 'maker_keyword' => 'FALENO', 'group_release' => '1'],
        ['key' => 'lp_muku_new', 'maker_keyword' => '無垢', 'group_release' => '1'],
        ['key' => 'lp_kmp_cosplay', 'maker_keyword' => 'ケイ・エム・プロデュース', 'genre_keyword' => 'コスプレ'],
        ['key' => 'lp_anal', 'genre_keyword' => 'アナルセックス'],
        ['key' => 'lp_squirting', 'genre_keyword' => '潮吹き'],
        ['key' => 'lp_4k', 'genre_keyword' => '4K'],
        ['key' => 'lp_vr', 'genre_keyword' => 'VR専用'],
    ];
}

/**
 * @param array<string,string> $def
 * @return array<string,string|int>
 */
function buildGrowthLandingQueryForSitemap(array $def): array
{
    $query = [
        'search_mode' => 'product',
        'list_sort' => 'popularity',
        'hits' => 60,
        'offset' => 1,
    ];
    if (isset($def['key']) && trim((string) $def['key']) !== '') {
        $query['lp'] = trim((string) $def['key']);
    }
    foreach (['maker_keyword', 'series_keyword', 'genre_keyword', 'product_keyword'] as $key) {
        if (!isset($def[$key])) {
            continue;
        }
        $value = trim((string) $def[$key]);
        if ($value === '') {
            continue;
        }
        $query[$key] = $value;
    }
    if (isset($def['group_release']) && (string) $def['group_release'] === '1') {
        $query['group_release'] = '1';
    }
    return $query;
}

$dbPath = __DIR__ . '/../data/actresses.sqlite';
if (!is_file($dbPath)) {
    fwrite(STDERR, "DB not found: {$dbPath}\n");
    exit(1);
}

try {
    $pdo = new PDO('sqlite:' . $dbPath);
    $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
    $pdo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
} catch (Throwable $e) {
    fwrite(STDERR, 'DB open error: ' . $e->getMessage() . "\n");
    exit(1);
}

/**
 * @return array<int,array{content_id:string,date:string}>
 */
function fetchSourceRows(PDO $pdo, string $source, int $limit): array
{
    if ($limit <= 0) {
        return [];
    }
    $sql = <<<SQL
SELECT p.content_id, p.date
FROM product_sources ps
INNER JOIN products p ON p.content_id = ps.content_id
WHERE ps.source = :source
  AND p.content_id IS NOT NULL
  AND p.content_id != ''
ORDER BY
  CASE WHEN p.date IS NULL OR p.date = '' THEN 1 ELSE 0 END ASC,
  p.date DESC,
  p.content_id DESC
LIMIT :limit
SQL;
    $stmt = $pdo->prepare($sql);
    $stmt->bindValue(':source', $source, PDO::PARAM_STR);
    $stmt->bindValue(':limit', $limit, PDO::PARAM_INT);
    $stmt->execute();
    $rows = $stmt->fetchAll();
    return is_array($rows) ? $rows : [];
}

/**
 * @return array<int,string>
 */
function splitSeriesNames(string $raw): array
{
    $parts = preg_split('/[\/／\r\n,、]+/u', $raw) ?: [];
    $names = [];
    foreach ($parts as $part) {
        if (!is_string($part)) {
            continue;
        }
        $name = trim($part);
        if ($name === '') {
            continue;
        }
        $names[] = $name;
    }
    return $names;
}

/**
 * @return array<int,array{name:string,count:int,lastmod:string}>
 */
function fetchTopSeriesRows(PDO $pdo, int $limit, DateTimeImmutable $today): array
{
    if ($limit <= 0) {
        return [];
    }
    $sql = <<<SQL
SELECT p.series_names, p.date
FROM product_sources ps
INNER JOIN products p ON p.content_id = ps.content_id
WHERE ps.source IN ('popular_products', 'latest_products')
  AND p.series_names IS NOT NULL
  AND TRIM(p.series_names) <> ''
SQL;
    $stmt = $pdo->query($sql);
    $rows = $stmt->fetchAll();
    if (!is_array($rows) || count($rows) === 0) {
        return [];
    }

    $byName = [];
    foreach ($rows as $row) {
        if (!is_array($row) || !isset($row['series_names']) || !is_string($row['series_names'])) {
            continue;
        }
        $lastmod = normalizeLastmodDate(is_string($row['date'] ?? null) ? (string) $row['date'] : '', $today);
        foreach (splitSeriesNames($row['series_names']) as $name) {
            $key = function_exists('mb_strtolower') ? mb_strtolower($name, 'UTF-8') : strtolower($name);
            if (!isset($byName[$key])) {
                $byName[$key] = ['name' => $name, 'count' => 1, 'lastmod' => $lastmod];
                continue;
            }
            $byName[$key]['count'] = (int) $byName[$key]['count'] + 1;
            if (is_string($byName[$key]['lastmod']) && $byName[$key]['lastmod'] < $lastmod) {
                $byName[$key]['lastmod'] = $lastmod;
            }
        }
    }
    $out = array_values($byName);
    usort($out, static function (array $a, array $b): int {
        $ac = (int) ($a['count'] ?? 0);
        $bc = (int) ($b['count'] ?? 0);
        if ($ac === $bc) {
            return strcmp((string) ($a['name'] ?? ''), (string) ($b['name'] ?? ''));
        }
        return $bc <=> $ac;
    });
    return array_slice($out, 0, $limit);
}

/**
 * @return array<int,array{name:string,count:int,lastmod:string}>
 */
function fetchTopMakerRows(PDO $pdo, int $limit, DateTimeImmutable $today): array
{
    if ($limit <= 0) {
        return [];
    }
    $sql = <<<SQL
SELECT p.maker_names, p.date
FROM product_sources ps
INNER JOIN products p ON p.content_id = ps.content_id
WHERE ps.source IN ('popular_products', 'latest_products')
  AND p.maker_names IS NOT NULL
  AND TRIM(p.maker_names) <> ''
SQL;
    $stmt = $pdo->query($sql);
    $rows = $stmt->fetchAll();
    if (!is_array($rows) || count($rows) === 0) {
        return [];
    }
    $byName = [];
    foreach ($rows as $row) {
        if (!is_array($row) || !isset($row['maker_names']) || !is_string($row['maker_names'])) {
            continue;
        }
        $lastmod = normalizeLastmodDate(is_string($row['date'] ?? null) ? (string) $row['date'] : '', $today);
        foreach (splitSeriesNames($row['maker_names']) as $name) {
            $key = function_exists('mb_strtolower') ? mb_strtolower($name, 'UTF-8') : strtolower($name);
            if (!isset($byName[$key])) {
                $byName[$key] = ['name' => $name, 'count' => 1, 'lastmod' => $lastmod];
                continue;
            }
            $byName[$key]['count'] = (int) $byName[$key]['count'] + 1;
            if (is_string($byName[$key]['lastmod']) && $byName[$key]['lastmod'] < $lastmod) {
                $byName[$key]['lastmod'] = $lastmod;
            }
        }
    }
    $out = array_values($byName);
    usort($out, static function (array $a, array $b): int {
        $ac = (int) ($a['count'] ?? 0);
        $bc = (int) ($b['count'] ?? 0);
        if ($ac === $bc) {
            return strcmp((string) ($a['name'] ?? ''), (string) ($b['name'] ?? ''));
        }
        return $bc <=> $ac;
    });
    return array_slice($out, 0, $limit);
}

/**
 * @return array<int,array{name:string,count:int,lastmod:string}>
 */
function fetchTopGenreRows(PDO $pdo, int $limit, DateTimeImmutable $today): array
{
    if ($limit <= 0) {
        return [];
    }
    $sql = <<<SQL
SELECT p.genre_names, p.date
FROM product_sources ps
INNER JOIN products p ON p.content_id = ps.content_id
WHERE ps.source IN ('popular_products', 'latest_products')
  AND p.genre_names IS NOT NULL
  AND TRIM(p.genre_names) <> ''
SQL;
    $stmt = $pdo->query($sql);
    $rows = $stmt->fetchAll();
    if (!is_array($rows) || count($rows) === 0) {
        return [];
    }
    $byName = [];
    foreach ($rows as $row) {
        if (!is_array($row) || !isset($row['genre_names']) || !is_string($row['genre_names'])) {
            continue;
        }
        $lastmod = normalizeLastmodDate(is_string($row['date'] ?? null) ? (string) $row['date'] : '', $today);
        foreach (splitSeriesNames($row['genre_names']) as $name) {
            $key = function_exists('mb_strtolower') ? mb_strtolower($name, 'UTF-8') : strtolower($name);
            if (!isset($byName[$key])) {
                $byName[$key] = ['name' => $name, 'count' => 1, 'lastmod' => $lastmod];
                continue;
            }
            $byName[$key]['count'] = (int) $byName[$key]['count'] + 1;
            if (is_string($byName[$key]['lastmod']) && $byName[$key]['lastmod'] < $lastmod) {
                $byName[$key]['lastmod'] = $lastmod;
            }
        }
    }
    $out = array_values($byName);
    usort($out, static function (array $a, array $b): int {
        $ac = (int) ($a['count'] ?? 0);
        $bc = (int) ($b['count'] ?? 0);
        if ($ac === $bc) {
            return strcmp((string) ($a['name'] ?? ''), (string) ($b['name'] ?? ''));
        }
        return $bc <=> $ac;
    });
    return array_slice($out, 0, $limit);
}

/**
 * @return array<int,array{name:string,count:int,lastmod:string}>
 */
function fetchTopActressRows(PDO $pdo, int $limit, DateTimeImmutable $today): array
{
    if ($limit <= 0) {
        return [];
    }
    $sql = <<<SQL
SELECT p.actress_names, p.date
FROM product_sources ps
INNER JOIN products p ON p.content_id = ps.content_id
WHERE ps.source IN ('popular_products', 'latest_products')
  AND p.actress_names IS NOT NULL
  AND TRIM(p.actress_names) <> ''
SQL;
    $stmt = $pdo->query($sql);
    $rows = $stmt->fetchAll();
    if (!is_array($rows) || count($rows) === 0) {
        return [];
    }
    $byName = [];
    foreach ($rows as $row) {
        if (!is_array($row) || !isset($row['actress_names']) || !is_string($row['actress_names'])) {
            continue;
        }
        $lastmod = normalizeLastmodDate(is_string($row['date'] ?? null) ? (string) $row['date'] : '', $today);
        foreach (splitSeriesNames($row['actress_names']) as $name) {
            $key = function_exists('mb_strtolower') ? mb_strtolower($name, 'UTF-8') : strtolower($name);
            if (!isset($byName[$key])) {
                $byName[$key] = ['name' => $name, 'count' => 1, 'lastmod' => $lastmod];
                continue;
            }
            $byName[$key]['count'] = (int) $byName[$key]['count'] + 1;
            if (is_string($byName[$key]['lastmod']) && $byName[$key]['lastmod'] < $lastmod) {
                $byName[$key]['lastmod'] = $lastmod;
            }
        }
    }
    $out = array_values($byName);
    usort($out, static function (array $a, array $b): int {
        $ac = (int) ($a['count'] ?? 0);
        $bc = (int) ($b['count'] ?? 0);
        if ($ac === $bc) {
            return strcmp((string) ($a['name'] ?? ''), (string) ($b['name'] ?? ''));
        }
        return $bc <=> $ac;
    });
    return array_slice($out, 0, $limit);
}

$popularRows = fetchSourceRows($pdo, 'popular_products', $popularLimit);
$latestRows = fetchSourceRows($pdo, 'latest_products', $latestLimit);
$topSeriesRows = fetchTopSeriesRows($pdo, $seriesLimit, new DateTimeImmutable('today'));
$topMakerRows = fetchTopMakerRows($pdo, $makerLimit, new DateTimeImmutable('today'));
$topGenreRows = fetchTopGenreRows($pdo, $genreLimit, new DateTimeImmutable('today'));
$topActressRows = fetchTopActressRows($pdo, $actressLimit, new DateTimeImmutable('today'));

$byCid = [];
foreach ([$popularRows, $latestRows] as $rows) {
    foreach ($rows as $row) {
        if (!isset($row['content_id']) || !is_string($row['content_id'])) {
            continue;
        }
        $cid = trim($row['content_id']);
        if ($cid === '') {
            continue;
        }
        $date = '';
        if (isset($row['date']) && is_string($row['date']) && preg_match('/^\d{4}-\d{2}-\d{2}/', $row['date'], $m) === 1) {
            $date = $m[0];
        }
        if (!isset($byCid[$cid])) {
            $byCid[$cid] = ['content_id' => $cid, 'date' => $date];
            continue;
        }
        if ($date !== '' && ($byCid[$cid]['date'] ?? '') < $date) {
            $byCid[$cid]['date'] = $date;
        }
    }
}

$items = array_values($byCid);
usort($items, static function (array $a, array $b): int {
    $ad = isset($a['date']) && is_string($a['date']) ? $a['date'] : '';
    $bd = isset($b['date']) && is_string($b['date']) ? $b['date'] : '';
    if ($ad === $bd) {
        return strcmp((string) ($b['content_id'] ?? ''), (string) ($a['content_id'] ?? ''));
    }
    return strcmp($bd, $ad);
});

$now = date('Y-m-d');
$today = new DateTimeImmutable('today');

/**
 * Ensure sitemap lastmod is a valid Y-m-d and never in the future.
 */
function normalizeLastmodDate(string $value, DateTimeImmutable $today): string
{
    $value = trim($value);
    if ($value === '' || preg_match('/^\d{4}-\d{2}-\d{2}$/', $value) !== 1) {
        return $today->format('Y-m-d');
    }
    try {
        $d = new DateTimeImmutable($value);
    } catch (Throwable $e) {
        return $today->format('Y-m-d');
    }
    if ($d > $today) {
        return $today->format('Y-m-d');
    }
    return $d->format('Y-m-d');
}

$xml = [];
$xml[] = '<?xml version="1.0" encoding="UTF-8"?>';
$xml[] = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
$xml[] = '  <url>';
$xml[] = '    <loc>' . htmlspecialchars($baseUrl . '/', ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
$xml[] = '    <lastmod>' . $now . '</lastmod>';
$xml[] = '    <changefreq>daily</changefreq>';
$xml[] = '    <priority>0.80</priority>';
$xml[] = '  </url>';
foreach (growthLandingDefinitionsForSitemap() as $landingDef) {
    if (!is_array($landingDef)) {
        continue;
    }
    $landingQuery = http_build_query(buildGrowthLandingQueryForSitemap($landingDef));
    if ($landingQuery === '') {
        continue;
    }
    $url = $baseUrl . '/?' . $landingQuery;
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . $now . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.70</priority>';
    $xml[] = '  </url>';
}
foreach ($topSeriesRows as $seriesRow) {
    if (!isset($seriesRow['name']) || !is_string($seriesRow['name'])) {
        continue;
    }
    $seriesName = trim($seriesRow['name']);
    if ($seriesName === '') {
        continue;
    }
    $seriesLastmod = isset($seriesRow['lastmod']) && is_string($seriesRow['lastmod']) ? $seriesRow['lastmod'] : $now;
    $url = $baseUrl . '/category/series/' . rawurlencode($seriesName) . '/';
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . htmlspecialchars($seriesLastmod, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.70</priority>';
    $xml[] = '  </url>';
}
foreach ($topMakerRows as $makerRow) {
    if (!isset($makerRow['name']) || !is_string($makerRow['name'])) {
        continue;
    }
    $makerName = trim($makerRow['name']);
    if ($makerName === '') {
        continue;
    }
    $makerLastmod = isset($makerRow['lastmod']) && is_string($makerRow['lastmod']) ? $makerRow['lastmod'] : $now;
    $url = $baseUrl . '/category/maker/' . rawurlencode($makerName) . '/';
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . htmlspecialchars($makerLastmod, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.70</priority>';
    $xml[] = '  </url>';
}
foreach ($topGenreRows as $genreRow) {
    if (!isset($genreRow['name']) || !is_string($genreRow['name'])) {
        continue;
    }
    $genreName = trim($genreRow['name']);
    if ($genreName === '') {
        continue;
    }
    $genreLastmod = isset($genreRow['lastmod']) && is_string($genreRow['lastmod']) ? $genreRow['lastmod'] : $now;
    $url = $baseUrl . '/category/janre/' . rawurlencode($genreName) . '/';
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . htmlspecialchars($genreLastmod, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.70</priority>';
    $xml[] = '  </url>';
}
foreach ($topActressRows as $actressRow) {
    if (!isset($actressRow['name']) || !is_string($actressRow['name'])) {
        continue;
    }
    $actressName = trim($actressRow['name']);
    if ($actressName === '') {
        continue;
    }
    $actressLastmod = isset($actressRow['lastmod']) && is_string($actressRow['lastmod']) ? $actressRow['lastmod'] : $now;
    $url = $baseUrl . '/category/actress/' . rawurlencode($actressName) . '/';
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . htmlspecialchars($actressLastmod, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.70</priority>';
    $xml[] = '  </url>';
}
foreach ($items as $row) {
    $cid = (string) $row['content_id'];
    $lastmod = normalizeLastmodDate((string) ($row['date'] ?? ''), $today);
    $url = $baseUrl . '/?view=detail&cid=' . rawurlencode($cid);
    $xml[] = '  <url>';
    $xml[] = '    <loc>' . htmlspecialchars($url, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</loc>';
    $xml[] = '    <lastmod>' . htmlspecialchars($lastmod, ENT_QUOTES | ENT_XML1, 'UTF-8') . '</lastmod>';
    $xml[] = '    <changefreq>weekly</changefreq>';
    $xml[] = '    <priority>0.60</priority>';
    $xml[] = '  </url>';
}
$xml[] = '</urlset>';
$body = implode("\n", $xml) . "\n";

$landingCount = count(growthLandingDefinitionsForSitemap());
$seriesCount = count($topSeriesRows);
$makerCount = count($topMakerRows);
$genreCount = count($topGenreRows);
$actressCount = count($topActressRows);

$outDir = dirname($outputPath);
if (!is_dir($outDir)) {
    @mkdir($outDir, 0775, true);
}
$ok = @file_put_contents($outputPath, $body);
if ($ok === false) {
    fwrite(STDERR, "write error: {$outputPath}\n");
    exit(1);
}

fwrite(STDOUT, sprintf(
    "done sitemap=%s urls=%d (top=%d, landing=%d, series=%d, maker=%d, janre=%d, actress=%d, popular=%d, latest=%d, unique=%d)\n",
    $outputPath,
    count($items) + 1 + $landingCount + $seriesCount + $makerCount + $genreCount + $actressCount,
    1,
    $landingCount,
    $seriesCount,
    $makerCount,
    $genreCount,
    $actressCount,
    count($popularRows),
    count($latestRows),
    count($items)
));
