<?php
date_default_timezone_set('Asia/Kolkata');
define('SITEMAP_FILE', 'sitemap.xml');
define('MAX_URLS', 50000);
define('MAX_SIZE', 48.97*1024*1024); // 48.97 MB
define('ALLOWED_EXT', ['php','html','htm']);
ini_set('max_execution_time', 0);
ini_set('memory_limit', '1024M');

ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
ini_set('error_log', __DIR__ . '/php-error.log');
error_reporting(E_ALL);

flush();
file_put_contents('heartbeat.log', time() . PHP_EOL, FILE_APPEND);

/* handles: Cloudflare , Bunny , Nginx proxy , Load balancers */
$isHttps =
    (!empty($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off')
    || (!empty($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] === 'https');
$baseUrl = ($isHttps ? 'https://' : 'http://') . $_SERVER['HTTP_HOST'] . '/';
$urls = [];
/**
 * Scan directory recursively
 */
function scanFiles(string $dir, array &$urls, string $baseUrl)
{
    $iterator = new RecursiveIteratorIterator(
        new RecursiveDirectoryIterator($dir, FilesystemIterator::SKIP_DOTS)
    );
/*-----------------------*/
/* skip dirs */
$SKIP_DIRS = [''];
$SKIP_DIRS = [
    '/5.5.22/',
    '/14J/',
    '/200/',
    '/301R/',
    '/404/',
    '/500/',
    '/A8900/',
    '/abante/',
    '/az/',
    '/b/',
    '/cash/',
    '/Con/',
    '/device/',
    '/done/',
    '/error/',
    '/FM2/',
    '/FM3/',
    '/gp/',
    '/gps/',
    '/hosting/',
    '/img/',
    '/K/',
    '/KPC/',
    '/L/',
    '/Mail/',
    'media/',
    '/meta/',
    '/mistress/',
    '/msg/',
    '/nobi/',
    '/noti/',
    '/payment/',
    '/pdf/',
    '/PFM/',
    '/PFM(1)/',
    '/Photography/',
    '/pod/',
    '/raja/',
    '/rita/',
    '/rs/',
    '/slider/',
    '/slider1/',
    '/stories/',
    '/test/',
    '/tv/',
    '/v/',
    '/V-show/',
    '/web/',
    '/whmcs/',
    '/WT/',
    '/0/',
    '//',
    '/vendor/',
    '/node_modules/',
    '/cache/',
    '/logs/',
    '/uploads/',
    '/images/',
    '/css/',
    '/js/',
    '/B/',
    '/B1/',
    '/B2/',
    '/test/1/',
    
];
/* skip files */
$SKIP_FILES = [
    '/abcw.php',
    '/sitemap.php',
    '/sitegen.php',
];
/*-----------------------*/
    foreach ($iterator as $file) {
        if (count($urls) >= MAX_URLS) {
            break;
        }
        if (!$file->isFile()) continue;
        $ext = strtolower($file->getExtension());
        if (!in_array($ext, ALLOWED_EXT)) continue;
        $path = str_replace('\\', '/', $file->getPathname());
        foreach ($SKIP_DIRS as $skip) {
            if (strpos($path, $skip) !== false) {
                continue 2;
            }
        }
        foreach ($SKIP_FILES as $skip) {
            if (substr($path, -strlen($skip)) === $skip) {
            continue 2;
            }
        }
$BLOCK_FILES = ['bot.php','1bot.php'];
if (in_array(basename($path), $BLOCK_FILES)) {
    continue;
}
        if (strpos($path, '/.') !== false) continue;
        $tags = @get_meta_tags($path);
        // Skip noindex pages
        if (!empty($tags)) {
            foreach ($tags as $k => $v) {
                if (stripos($k, 'bot') !== false && stripos($v, 'noindex') !== false) {
                    continue 2;
                }
            }
        }
$loc = $baseUrl . ltrim($path, './');
// Homepage override
if (!empty($tags['homepage']) && strtolower($tags['homepage']) === 'yes') {
    $loc = $baseUrl;
}
// Skip index files (only ONE homepage allowed)
if (preg_match('/^index\.(php|html?|htm)$/i', basename($path))) {
    continue;
}
$loc = preg_replace(
    '#/index\.(php|html|htm)$#i',
    '/',
    $loc
);

$important = $tags['importent'] ?? null;
if ($important === 'yes' || $important === 'YES') {

//if(($tags['importent']) && strtolower($tags['importent']) === "yes") {
        if($tags['priority'] != '') {
            if(is_numeric($tags['priority'])==1 && $tags['priority'] >=0 && $tags['priority'] <=1) {

            $tags['priority'] =$tags['priority'];
            }
            else{
            $tags['priority'] ="0.5";
            }
        }
if (!isset($tags['priority'])) {
   //echo "hi<hr>";
   $tags['priority'] = "0.1";
}
if (!isset($tags['changefreq'])) {
   //echo "hello<hr>";
   $tags['changefreq'] = "monthly";
}
if($tags['changefreq'] == '') {
    //echo "reArrange<hr>";
    $tags['changefreq'] ="weekly";
}
        $urls[] = [
            'loc'        => $loc,   //$baseUrl . ltrim($path, './'),
            'lastmod'    => date('Y-m-d\TH:i:sP', filemtime($path)),
            'changefreq'=> $tags['changefreq'] ?? '',
            'priority'  => $tags['priority'] ?? ''
        ];
    }
}
}
scanFiles('.', $urls, $baseUrl);
/* ================= IMAGE FUNCTIONS ================= */
function extractImagesFromHtml(string $html): array
{
    libxml_use_internal_errors(true);
    $dom = new DOMDocument('1.0', 'UTF-8');
    $dom->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
    $images = [];
    $seen = [];
    foreach ($dom->getElementsByTagName('img') as $img) {
        $src = trim($img->getAttribute('src'));
        $bg   = trim($img->getAttribute('data-bg'));
        $raw = $src ?: $bg;
//take src if exists,else data-bg//
        if (!$raw) continue;
        $url = normalizeImageUrl($raw);
        if (isset($seen[$url])) continue;
        $seen[$url] = true;
        $images[] = [
            'src'   => $url,
            'alt'   => trim($img->getAttribute('alt')),
            'title' => trim($img->getAttribute('title')),
        ];
    }
// 2) ANY element with data-bg
$xpath = new DOMXPath($dom);
$nodes = $xpath->query('//*[@data-bg]');
foreach ($nodes as $node) {
    $bg = trim($node->getAttribute('data-bg'));
    if (!$bg) continue;
    foreach (array_map('trim', explode(',', $bg)) as $src) {
        if (!$src) continue;
        $url = normalizeImageUrl($src);
        if (isset($seen[$url])) continue;
        $seen[$url] = true;
        $images[] = [
            'src'   => $url,
            'alt'   => trim($img->getAttribute('alt')),
            'title' => trim($img->getAttribute('title')),
        ];
    }
}
    return $images;
}
function normalizeImageUrl(string $url): string
{
    if (strpos($url, '//') === 0) return 'https:' . $url;
    if (preg_match('#^https?://#i', $url)) return $url;
    return 'https://' . $_SERVER['HTTP_HOST'] . '/' . ltrim($url, '/');
}
function renderPhpFile(string $filePath): string
{
    if (!file_exists($filePath)) return '';
    ob_start();
    include $filePath;
    return ob_get_clean();
}
function isDecorativeImage(string $url): bool
{
    $IGNORE_IMAGE_KEYWORDS = [
        'bg','background','pattern','overlay','shadow','texture',
        'noise','gradient','sprite','icon','divider','line','bullet',
    ];
    $name = strtolower(basename(parse_url($url, PHP_URL_PATH)));
    foreach ($IGNORE_IMAGE_KEYWORDS as $word) {
        if (strpos($name, $word) !== false) {
            return true;
        }
    }
    return false;
}
/* ================= IMAGE FUNCTIONS ================= */
/* ==== Build XML === */
/* ======================================== */
$xSite=1;
function entry() {
    $xml  = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    $xml .= "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\"
xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\"
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">\n";

    return $xml;
}
//echo strlen(entry()).'<br>';
//$xml .= entry();

    $xml  = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    $xml .= "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\"
xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\"
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">\n";

$totalImages = 0;
$ar = [];
$ia = 0;
$break = 0;

foreach ($urls as $u) {
/* ================= EXECUTION ================= */
$path = parse_url($u['loc'], PHP_URL_PATH);
$path = ltrim($path, '/');
// IMPORTANT: local filesystem path, NOT URL
$phpFile = __DIR__ . '/'.$path;
// render PHP → get final HTML
$html = renderPhpFile($phpFile);
// debug safety
if (!$html) {
    die('PHP file produced no output');
}
$images = extractImagesFromHtml($html);
if (!$images) {
    echo 'No images found';
    exit;
}
$images = array_filter($images, function ($img) {
    return !isDecorativeImage($img['src']);
});

//echo array_sum($ar).'<br>';
    if (array_sum($ar) >= MAX_SIZE)
    $break = 1;
    //echo 'working<br>';

/* ================= EXECUTION ================= */
    //$xml .= "  <url>\n";
    $urlChunk  = "  <url>\n";
    //$xml .= "    <loc>{$u['loc']}</loc>\n";
    $urlChunk .= "    <loc>{$u['loc']}</loc>\n";

$MaxImgCount = 0;
foreach ($images as $img) {
    //Google allows 1000 images per page
    $MaxImgCount++;
    if ($MaxImgCount >= 1000) break;
        //$xml .= "    <image:image>\n";
        $urlChunk .= "    <image:image>\n";
        //$xml .= "      <image:loc>{$img['src']}</image:loc>\n";
        $urlChunk .= "      <image:loc>{$img['src']}</image:loc>\n";
        if ($img['title']) {
            //$xml .= "      <image:title>" . htmlspecialchars($img['title']) . "</image:title>\n";
            $urlChunk .= "      <image:title>" . htmlspecialchars($img['title']) . "</image:title>\n";
        }
        if ($img['alt']) {
            //$xml .= "      <image:caption>" . htmlspecialchars($img['alt']) . "</image:caption>\n";
            $urlChunk .= "      <image:caption>" . htmlspecialchars($img['alt']) . "</image:caption>\n";
        }
        //$xml .= "    </image:image>\n";
        $urlChunk .= "    </image:image>\n";
        //$endImgTag = "</image:image>";
        //echo strlen($endImgTag).'<br>';
        $strLOC = strlen($urlChunk);
        //echo $strLOC.'<hr>';
        if($strLOC >= 1000000) {
            //echo 'ok<br>';
            break;
        }
        $totalImages++;
        //$totalTMGs = $startImgTag + $srcIMG + $titleIMG + $altIMG + $endImgTag;
        //echo 'totalTMGs: '.$totalTMGs.'<br><br>';
    }
    //$xml .= "    <lastmod>{$u['lastmod']}</lastmod>\n";
    $urlChunk .= "    <lastmod>{$u['lastmod']}</lastmod>\n";
    if ($u['changefreq']) {
        //$xml .= "    <changefreq>{$u['changefreq']}</changefreq>\n";
        $urlChunk .= "    <changefreq>{$u['changefreq']}</changefreq>\n";
    }
    if ($u['priority'] !== '') {
        //$xml .= "    <priority>{$u['priority']}</priority>\n";
        $urlChunk .= "    <priority>{$u['priority']}</priority>\n";
    }
    //$xml .= "  </url>\n";
    $urlChunk .= "  </url>\n";
$xml .= $urlChunk;
$strLOC = strlen($urlChunk);

$ar[$ia] = $strLOC;
$ia++;

//echo $strLOC.'<hr>';
    if ($break == 1)
    break;
/**
$chunkSize = strlen($urlChunk);

// If adding this chunk exceeds max size → close & start new sitemap
if ($currentSize + $chunkSize >= MAX_SIZE) {

    // close current sitemap
    $xml .= "</urlset>\n";
    file_put_contents("sitemap_{$xSite}.xml", $xml);

    // prepare next sitemap
    $xSite++;
    $xml = entry();
    $currentSize = strlen($xml);

    // IMPORTANT: do NOT skip this URL
    // fall through and add it to the new sitemap
}

// add URL chunk
$xml .= $urlChunk;
$currentSize += $chunkSize;
/**/
/* ======================================== */
}
$xml .= "</urlset>";
//echo 341+ $strLOC+strlen("</urlset>").'<hr>';
file_put_contents(SITEMAP_FILE, $xml);
// save uncompressed directly
$sizeInBytesUC = file_put_contents('sitemap.xml', $xml);
// save compressed directly
$sizeInBytesC = file_put_contents('sitemap.xml.gz', gzencode($xml, 9));
// show total images
echo "Total images across all pages: $totalImages <br>";
echo "Uncompressed Sitemap size: $sizeInBytesUC bytes<br>(".convert($sizeInBytesUC).' )<br>';
echo "Compressed Sitemap size: $sizeInBytesC bytes<br>(".convert($sizeInBytesC).' )<br>';
//free memory
unset($sizeInBytesUC);
unset($sizeInBytesC);
/**
usort($urls, fn($a, $b) => strcmp($a['loc'], $b['loc']));
echo "<pre>";
foreach ($urls as $u) {
    echo $u['loc'] . "\n";
}
/**/
echo "</pre>";
echo "✅ Sitemap generated successfully<br>";
echo "<span style='font-size:20px;'>&#128464;</span> Image URLs: " . $totalImages . "<br>";
echo "📄 URLs: " . count($urls) . "<br>";
echo "🔗 <a href='".SITEMAP_FILE."' target='_blank'>View Sitemap</a><br>";
echo "⏱ Execution time: " .
     round(microtime(true) - $_SERVER["REQUEST_TIME_FLOAT"], 4) . " sec";
function convert($size)
{
    $unit=array('B <b>[Bytes]</b>','KB <b>[Kilobytes]</b>','MB <b>[Megabytes]</b>','GB [Gigabytes]','TB [Terabytes]','PB [Petabytes]');
    return @round($size/pow(1024,($i=floor(log($size,1024)))),2).' '.$unit[$i];
}
echo "<br>Total RAM is used: ".convert(memory_get_usage(true));
?>