<?php
/**
 * ================================================================
 * DRIPBOX LINK CHECKER - SCANNER
 * ================================================================
 * Purpose: Find all external links in posts
 * ================================================================
 */

if (!defined('ABSPATH')) {
    exit;
}

class DripBox_Link_Checker_Scanner {
    
    private static $instance = null;
    
    /**
     * Singleton
     */
    public static function get_instance() {
        if (self::$instance === null) {
            self::$instance = new self();
        }
        return self::$instance;
    }
    
    /**
     * Find all external links in published posts
     */
    public function find_all_links() {
        $links = [];
        
        // Get all published posts (including custom post types)
        $posts = get_posts([
            'post_type' => ['post', 'page', 'dripbox_product'],
            'post_status' => 'publish',
            'posts_per_page' => -1,
            'fields' => 'ids'
        ]);
        
        foreach ($posts as $post_id) {
            $post_links = $this->extract_links_from_post($post_id);
            $links = array_merge($links, $post_links);
        }
        
        // Remove duplicates (same URL in different posts)
        $unique_links = [];
        $seen_urls = [];
        
        foreach ($links as $link) {
            $url = $link['url'];
            if (!in_array($url, $seen_urls)) {
                $unique_links[] = $link;
                $seen_urls[] = $url;
            }
        }
        
        return $unique_links;
    }
    
    /**
     * Extract links from a single post
     */
    public function extract_links_from_post($post_id) {
        $links = [];
        $post = get_post($post_id);
        
        if (!$post) {
            return $links;
        }
        
        $content = $post->post_content;
        
        // Extract all <a> tags
        preg_match_all('/<a[^>]+href=["\'](.*?)["\'][^>]*>/i', $content, $matches);
        
        if (!empty($matches[1])) {
            foreach ($matches[1] as $url) {
                // Only external links (not internal or anchors)
                if ($this->is_external_link($url)) {
                    $links[] = [
                        'url' => esc_url_raw($url),
                        'post_id' => $post_id,
                        'post_title' => get_the_title($post_id),
                        'post_url' => get_permalink($post_id)
                    ];
                }
            }
        }
        
        // Also check meta fields (DripBox products have links in meta)
        if ($post->post_type === 'dripbox_product') {
            $meta_links = $this->extract_links_from_meta($post_id);
            $links = array_merge($links, $meta_links);
        }
        
        return $links;
    }
    
    /**
     * Extract links from post meta (DripBox products)
     */
    private function extract_links_from_meta($post_id) {
        $links = [];
        $meta_keys = ['amazon', 'other', 'third', 'fourth'];
        
        foreach ($meta_keys as $key) {
            $url = get_post_meta($post_id, $key, true);
            
            if (!empty($url) && filter_var($url, FILTER_VALIDATE_URL)) {
                $links[] = [
                    'url' => esc_url_raw($url),
                    'post_id' => $post_id,
                    'post_title' => get_the_title($post_id),
                    'post_url' => get_permalink($post_id),
                    'source' => 'meta_' . $key
                ];
            }
        }
        
        return $links;
    }
    
    /**
     * Check if URL is external
     */
    private function is_external_link($url) {
        // Skip anchors, mailto, tel, javascript
        if (
            empty($url) ||
            strpos($url, '#') === 0 ||
            strpos($url, 'mailto:') === 0 ||
            strpos($url, 'tel:') === 0 ||
            strpos($url, 'javascript:') === 0
        ) {
            return false;
        }
        
        // Check if it's a full URL
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
            return false;
        }
        
        // Get site domain
        $site_domain = parse_url(get_site_url(), PHP_URL_HOST);
        $link_domain = parse_url($url, PHP_URL_HOST);
        
        // External if domains don't match
        return ($link_domain !== $site_domain);
    }
    
    /**
     * Get link count stats
     */
    public function get_link_stats() {
        $all_links = $this->find_all_links();
        
        return [
            'total' => count($all_links),
            'by_post_type' => $this->count_by_post_type($all_links),
            'by_domain' => $this->count_by_domain($all_links)
        ];
    }
    
    /**
     * Count links by post type
     */
    private function count_by_post_type($links) {
        $counts = [];
        
        foreach ($links as $link) {
            $post_type = get_post_type($link['post_id']);
            if (!isset($counts[$post_type])) {
                $counts[$post_type] = 0;
            }
            $counts[$post_type]++;
        }
        
        return $counts;
    }
    
    /**
     * Count links by domain
     */
    private function count_by_domain($links) {
        $counts = [];
        
        foreach ($links as $link) {
            $domain = parse_url($link['url'], PHP_URL_HOST);
            if (!isset($counts[$domain])) {
                $counts[$domain] = 0;
            }
            $counts[$domain]++;
        }
        
        // Sort by count descending
        arsort($counts);
        
        return array_slice($counts, 0, 10); // Top 10 domains
    }
}