<?php
declare(strict_types=1);

/**
 * Parallel FindAll -> MySQL ingestor (PHP)
 * - Creates FindAll run
 * - Polls until completion
 * - Fetches results
 * - Stores matched events into MySQL
 */

const PARALLEL_API_BASE = 'https://api.parallel.ai/v1beta';
const PARALLEL_BETA_HEADER = 'findall-2025-09-15';

// ---- CONFIG ----
$PARALLEL_API_KEY = getenv('PARALLEL_API_KEY') ?: getenv('PARALLEL_API') ?: 'Q1Eu0HdXvOO7gCP_5dmNCZs74y-l0eT7Ax-6mKJO';
if ($PARALLEL_API_KEY === '') {
  fwrite(STDERR, "ERROR: Please set PARALLEL_API_KEY or PARALLEL_API env var.\n");
  exit(1);
}

// DB credentials (as requested)
$dbHost = '103.253.175.3';
$dbName = 'db_belive_events';
$dbUser = 'dbuser';
$dbPass = 'Global@123';

$pdo = new PDO(
  "mysql:host={$dbHost};dbname={$dbName};charset=utf8mb4",
  $dbUser,
  $dbPass,
  [
    PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
    PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
  ]
);

// ---- HTTP helper ----
function parallelRequest(string $method, string $path, array $payload = null): array {
  global $PARALLEL_API_KEY;

  $url = PARALLEL_API_BASE . $path;

  $headers = [
    'x-api-key: ' . $PARALLEL_API_KEY,
    'parallel-beta: ' . PARALLEL_BETA_HEADER,
    'Content-Type: application/json',
  ];

  $ch = curl_init($url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, strtoupper($method));
  curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

  if ($payload !== null) {
    curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_SLASHES));
  }

  $body = curl_exec($ch);
  $err  = curl_error($ch);
  $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
  curl_close($ch);

  if ($body === false) {
    throw new RuntimeException("cURL error: {$err}");
  }

  if ($code < 200 || $code >= 300) {
    // Try to parse error message for better user experience
    $errorMsg = "HTTP {$code}";
    $decodedError = json_decode($body, true);
    if (is_array($decodedError)) {
      if (isset($decodedError['error']['message'])) {
        $errorMsg = $decodedError['error']['message'];
      } elseif (isset($decodedError['message'])) {
        $errorMsg = $decodedError['message'];
      }
    }
    
    if ($code === 402) {
      fwrite(STDERR, "\n⚠️  Billing Error: {$errorMsg}\n");
      fwrite(STDERR, "Please check your account balance or wait for running processors to complete.\n");
      fwrite(STDERR, "Billing page: https://platform.parallel.ai/settings?tab=billing\n\n");
    }
    
    throw new RuntimeException("Parallel API error HTTP {$code}: {$errorMsg}");
  }

  $decoded = json_decode($body, true);
  if (!is_array($decoded)) {
    throw new RuntimeException("Invalid JSON response: {$body}");
  }

  return $decoded;
}

// ---- API functions ----
function ingestQuery(string $objective): array {
  return parallelRequest('POST', '/findall/ingest', ['objective' => $objective]);
}

function createFindAllRun(array $options): array {
  $body = [
    'objective'    => $options['objective'],
    'entity_type'  => $options['entityType'] ?? 'events',
    'generator'    => $options['generator'] ?? 'core',
    'match_limit'  => $options['matchLimit'] ?? 50,
  ];

  if (!empty($options['matchConditions'])) {
    $body['match_conditions'] = $options['matchConditions'];
  }
  if (!empty($options['enrichments'])) {
    $body['enrichments'] = $options['enrichments'];
  }

  return parallelRequest('POST', '/findall/runs', $body); // returns findall_id
}

function getRunStatus(string $findallId): array {
  return parallelRequest('GET', "/findall/runs/{$findallId}");
}

function getRunResult(string $findallId): array {
  return parallelRequest('GET', "/findall/runs/{$findallId}/result");
}

function pollUntilComplete(string $findallId, int $maxWaitMs = 300000, int $pollIntervalMs = 5000): array {
  $start = (int)(microtime(true) * 1000);
  $lastStatusTime = 0;
  $statusIntervalMs = 30000; // Show status every 30 seconds

  echo "Polling for completion (checking every " . ($pollIntervalMs / 1000) . " seconds)...\n";

  while (((int)(microtime(true) * 1000) - $start) < $maxWaitMs) {
    $status = getRunStatus($findallId);

    // Expecting: $status['status']['is_active']
    $isActive = $status['status']['is_active'] ?? null;
    $currentStatus = $status['status']['status'] ?? 'unknown';
    
    // Show status periodically
    $elapsed = (int)(microtime(true) * 1000) - $start;
    if (($elapsed - $lastStatusTime) >= $statusIntervalMs) {
      $elapsedSec = round($elapsed / 1000);
      echo "  [{$elapsedSec}s] Status: {$currentStatus} (active: " . ($isActive ? 'yes' : 'no') . ")\n";
      $lastStatusTime = $elapsed;
    }
    
    if ($isActive === false) {
      $elapsedSec = round($elapsed / 1000);
      echo "Run completed in {$elapsedSec} seconds.\n";
      return $status;
    }

    usleep($pollIntervalMs * 1000);
  }

  throw new RuntimeException("Polling timeout exceeded for findall_id={$findallId}");
}

function pollAndProcessIncremental(PDO $pdo, string $findallId, int $maxWaitMs = 900000, int $pollIntervalMs = 10000): int {
  $start = (int)(microtime(true) * 1000);
  $lastStatusTime = 0;
  $statusIntervalMs = 30000; // Show status every 30 seconds
  $processedIds = []; // Track processed candidate IDs to avoid duplicates
  $totalStored = 0;

  echo "Polling and processing results incrementally (checking every " . ($pollIntervalMs / 1000) . " seconds)...\n";

  $fetchAttempts = 0;
  while (((int)(microtime(true) * 1000) - $start) < $maxWaitMs) {
    try {
      // Try to fetch results (may work even if run is still active)
      $fetchAttempts++;
      $result = getRunResult($findallId);
      $candidates = $result['candidates'] ?? [];
      
      // Debug: log what we got
      if ($fetchAttempts <= 3 || !empty($candidates)) {
        $candidateCount = is_array($candidates) ? count($candidates) : 0;
        echo "  [Fetch #{$fetchAttempts}] Got {$candidateCount} candidate(s)\n";
      }
      
      if (is_array($candidates) && !empty($candidates)) {
        $matched = array_values(array_filter($candidates, fn($c) => ($c['match_status'] ?? '') === 'matched'));
        $matchedCount = count($matched);
        
        if ($matchedCount > 0) {
          echo "  → Found {$matchedCount} matched candidate(s)\n";
        }
        
        // Process new candidates
        $newCount = 0;
        $skippedNoDate = 0;
        $debuggedFirst = false;
        foreach ($matched as $i => $candidate) {
          $candidateId = $candidate['candidate_id'] ?? ("event-" . $i);
          
          // Skip if already processed
          if (isset($processedIds[$candidateId])) {
            continue;
          }
          
          // Debug: Show structure of first candidate to understand data format
          if (!$debuggedFirst && $skippedNoDate === 0) {
            echo "  [DEBUG] First candidate structure:\n";
            echo "    - candidate_id: " . ($candidate['candidate_id'] ?? 'N/A') . "\n";
            echo "    - name: " . ($candidate['name'] ?? 'N/A') . "\n";
            echo "    - output keys: " . (isset($candidate['output']) && is_array($candidate['output']) ? implode(', ', array_keys($candidate['output'])) : 'N/A') . "\n";
            if (isset($candidate['output']['event_date'])) {
              echo "    - event_date value: " . json_encode($candidate['output']['event_date']) . "\n";
            }
            if (isset($candidate['output']['event_name'])) {
              echo "    - event_name value: " . json_encode($candidate['output']['event_name']) . "\n";
            }
            $debuggedFirst = true;
          }
          
          $row = transformCandidate($candidate, $i, $findallId);
          
          // Try to extract date from name or description if missing
          if (empty($row['start_date']) && !empty($row['name'])) {
            // Try to find date patterns in name or description
            $textToSearch = $row['name'] . ' ' . $row['description'];
            if (preg_match('/(\d{4}-\d{2}-\d{2})/', $textToSearch, $matches)) {
              $row['start_date'] = $matches[1];
              echo "  [INFO] Extracted date from text: {$row['start_date']}\n";
            } elseif (preg_match('/(\d{1,2}\/\d{1,2}\/\d{4})/', $textToSearch, $matches)) {
              // Convert MM/DD/YYYY to YYYY-MM-DD
              $dateParts = explode('/', $matches[1]);
              if (count($dateParts) === 3) {
                $row['start_date'] = sprintf('%04d-%02d-%02d', $dateParts[2], $dateParts[0], $dateParts[1]);
                echo "  [INFO] Extracted and converted date: {$row['start_date']}\n";
              }
            }
          }
          
          // Basic guard: prefer events with dates, but allow without if we have good data
          if (empty($row['start_date'])) {
            // Still store if we have at least a name
            if (empty($row['name'])) {
              $processedIds[$candidateId] = true;
              $skippedNoDate++;
              if ($skippedNoDate <= 3) {
                echo "  [DEBUG] Skipped candidate - no name or date. Available fields: " . json_encode(array_keys($candidate['output'] ?? [])) . "\n";
              }
              continue;
            }
            // Use a placeholder date if missing but we have other data
            $row['start_date'] = date('Y-m-d', strtotime('+30 days')); // Default to 30 days from now
            echo "  [WARN] No date found for '{$row['name']}', using placeholder date: {$row['start_date']}\n";
          }
          
          try {
            upsertEvent($pdo, $row);
            $processedIds[$candidateId] = true;
            $newCount++;
            $totalStored++;
            echo "  ✓ Inserted: {$row['name']} ({$row['start_date']})\n";
          } catch (Exception $e) {
            echo "  ⚠ Error inserting {$row['name']}: " . $e->getMessage() . "\n";
            $processedIds[$candidateId] = true; // Mark as processed to avoid retry loop
          }
        }
        
        if ($newCount > 0) {
          echo "  → Stored {$newCount} new event(s) (total: {$totalStored})\n";
        }
        if ($skippedNoDate > 0 && $newCount === 0) {
          echo "  ⚠ Skipped {$skippedNoDate} candidate(s) without start_date\n";
        }
      } elseif (empty($candidates) && $fetchAttempts <= 3) {
        echo "  → No candidates available yet (results may not be ready until completion)\n";
      }
    } catch (Exception $e) {
      // If result not available yet, that's okay - continue polling
      $errorMsg = $e->getMessage();
      if ($fetchAttempts <= 3) {
        if (strpos($errorMsg, '404') !== false || strpos($errorMsg, 'not found') !== false) {
          echo "  → Results endpoint not available yet (404 - this is normal while run is active)\n";
        } else {
          echo "  ⚠ Fetch error: " . $errorMsg . "\n";
        }
      }
    }
    
    // Check status
    try {
      $status = getRunStatus($findallId);
      $isActive = $status['status']['is_active'] ?? null;
      $currentStatus = $status['status']['status'] ?? 'unknown';
      
      // Show status periodically
      $elapsed = (int)(microtime(true) * 1000) - $start;
      if (($elapsed - $lastStatusTime) >= $statusIntervalMs) {
        $elapsedSec = round($elapsed / 1000);
        echo "  [{$elapsedSec}s] Status: {$currentStatus} (active: " . ($isActive ? 'yes' : 'no') . ") | Stored so far: {$totalStored}\n";
        $lastStatusTime = $elapsed;
      }
      
      // If run is complete, do one final fetch and exit
      if ($isActive === false) {
        $elapsedSec = round($elapsed / 1000);
        echo "\nRun completed in {$elapsedSec} seconds. Fetching final results...\n";
        
        // Final fetch to catch any remaining results
        try {
          $result = getRunResult($findallId);
          $candidates = $result['candidates'] ?? [];
          if (is_array($candidates)) {
            $matched = array_values(array_filter($candidates, fn($c) => ($c['match_status'] ?? '') === 'matched'));
            $finalNew = 0;
            foreach ($matched as $i => $candidate) {
              $candidateId = $candidate['candidate_id'] ?? ("event-" . $i);
              if (isset($processedIds[$candidateId])) continue;
              
              $row = transformCandidate($candidate, $i, $findallId);
              if (empty($row['start_date'])) {
                $processedIds[$candidateId] = true;
                continue;
              }
              
              try {
                upsertEvent($pdo, $row);
                $processedIds[$candidateId] = true;
                $finalNew++;
                $totalStored++;
              } catch (Exception $e) {
                $processedIds[$candidateId] = true;
              }
            }
            if ($finalNew > 0) {
              echo "  → Stored {$finalNew} additional event(s) from final fetch\n";
            }
          }
        } catch (Exception $e) {
          // Ignore errors in final fetch
        }
        
        return $totalStored;
      }
    } catch (Exception $e) {
      // Continue polling even if status check fails
    }

    usleep($pollIntervalMs * 1000);
  }

  echo "\n⚠ Polling timeout reached. Stored {$totalStored} events so far.\n";
  echo "You can check the run status later: findall_id={$findallId}\n";
  return $totalStored;
}

// ---- Transform candidate -> DB row ----
function safeOutputValue(array $candidate, string $key, string $default = ''): string {
  $output = $candidate['output'] ?? [];
  
  // Try different structures: output[key]['value'], output[key], or direct key
  if (isset($output[$key])) {
    if (is_array($output[$key]) && isset($output[$key]['value'])) {
      $val = $output[$key]['value'];
    } elseif (is_string($output[$key])) {
      $val = $output[$key];
    } else {
      $val = null;
    }
    
    if (is_string($val) && $val !== '') {
      return trim($val);
    }
  }
  
  // Try alternative field names for dates
  if ($key === 'event_date') {
    $alternatives = ['date', 'start_date', 'event_start_date', 'when', 'event_when'];
    foreach ($alternatives as $alt) {
      if (isset($output[$alt])) {
        $altVal = is_array($output[$alt]) ? ($output[$alt]['value'] ?? null) : $output[$alt];
        if (is_string($altVal) && $altVal !== '') {
          return trim($altVal);
        }
      }
    }
  }
  
  return $default;
}

function normalizeEventType(string $t): string {
  $t = strtolower(trim(preg_replace('/\s+/', '-', $t)));
  $valid = ['conference','exhibition','summit','festival','trade-show','networking','workshop','concert','sports','other'];
  return in_array($t, $valid, true) ? $t : 'other';
}

function transformCandidate(array $candidate, int $index, string $findallId = ''): array {
  // Use candidate_id if available, otherwise generate unique ID with findall_id prefix
  $id = $candidate['candidate_id'] ?? ("event-" . ($findallId ? substr($findallId, 0, 8) . "-" : "") . $index);

  $eventType = normalizeEventType(safeOutputValue($candidate, 'event_type', 'other'));

  $sponsorsStr = safeOutputValue($candidate, 'sponsors', '');
  $sponsors = array_values(array_filter(array_map('trim', explode(',', $sponsorsStr))));

  $name = safeOutputValue($candidate, 'event_name', $candidate['name'] ?? '');
  $startDate = safeOutputValue($candidate, 'event_date', '');
  $endDate = safeOutputValue($candidate, 'event_end_date', '');

  return [
    'id' => $id,
    'name' => $name,
    'start_date' => $startDate,
    'end_date' => $endDate !== '' ? $endDate : null,
    'attendees_range' => safeOutputValue($candidate, 'attendees_range', '1000+'),
    'venue' => safeOutputValue($candidate, 'venue_name', 'TBA'),
    'city' => safeOutputValue($candidate, 'city', ''),
    'country' => safeOutputValue($candidate, 'country', 'Middle East'),
    'ticket_available' => strtolower(safeOutputValue($candidate, 'ticket_available', 'unknown')) === 'yes' ? 1 : 0,
    'pricing' => ($p = safeOutputValue($candidate, 'ticket_pricing', '')) !== '' ? $p : null,
    'description' => safeOutputValue($candidate, 'one_liner_description', $candidate['description'] ?? ''),
    'attendee_profiles' => safeOutputValue($candidate, 'attendee_profiles', 'Business professionals and industry experts'),
    'sponsors' => implode(', ', $sponsors),
    'event_type' => $eventType,
    'website_url' => ($u = safeOutputValue($candidate, 'website_url', $candidate['url'] ?? '')) !== '' ? $u : null,
    'raw_candidate_json' => json_encode($candidate, JSON_UNESCAPED_SLASHES),
  ];
}

// ---- DB upsert ----
function upsertEvent(PDO $pdo, array $row): void {
  $sql = "
    INSERT INTO events (
      id, name, start_date, end_date, attendees_range, venue, city, country,
      ticket_available, pricing, description, attendee_profiles, sponsors,
      event_type, website_url, raw_candidate_json, created_at, updated_at
    ) VALUES (
      :id, :name, :start_date, :end_date, :attendees_range, :venue, :city, :country,
      :ticket_available, :pricing, :description, :attendee_profiles, :sponsors,
      :event_type, :website_url, :raw_candidate_json, NOW(), NOW()
    )
    ON DUPLICATE KEY UPDATE
      name = VALUES(name),
      end_date = VALUES(end_date),
      attendees_range = VALUES(attendees_range),
      venue = VALUES(venue),
      city = VALUES(city),
      country = VALUES(country),
      ticket_available = VALUES(ticket_available),
      pricing = VALUES(pricing),
      description = VALUES(description),
      attendee_profiles = VALUES(attendee_profiles),
      sponsors = VALUES(sponsors),
      event_type = VALUES(event_type),
      website_url = VALUES(website_url),
      raw_candidate_json = VALUES(raw_candidate_json),
      updated_at = NOW()
  ";

  $stmt = $pdo->prepare($sql);
  $stmt->execute([
    ':id' => $row['id'],
    ':name' => $row['name'],
    ':start_date' => $row['start_date'],
    ':end_date' => $row['end_date'],
    ':attendees_range' => $row['attendees_range'],
    ':venue' => $row['venue'],
    ':city' => $row['city'],
    ':country' => $row['country'],
    ':ticket_available' => $row['ticket_available'],
    ':pricing' => $row['pricing'],
    ':description' => $row['description'],
    ':attendee_profiles' => $row['attendee_profiles'],
    ':sponsors' => $row['sponsors'],
    ':event_type' => $row['event_type'],
    ':website_url' => $row['website_url'],
    ':raw_candidate_json' => $row['raw_candidate_json'],
  ]);
}

// ---- Main: build objective like your TS version ----
$today = new DateTimeImmutable('now');
$pastMonth = $today->modify('-1 month');
$futureMonth = $today->modify('+3 months');

$objective = sprintf(
  "Find upcoming business and consumer events in Middle East countries (United Arab Emirates, Saudi Arabia, Qatar, Bahrain, Kuwait, Oman, Jordan, Egypt, Israel, Lebanon) scheduled between %s and %s. Focus on events with 1000+ expected attendees including: technology conferences, business exhibitions, industry summits, trade shows, music festivals, sports competitions, and networking events. Prioritize events with confirmed dates, venues, and official websites.",
  $pastMonth->format('Y-m-d'),
  $futureMonth->format('Y-m-d')
);

$matchConditions = [
  [
    'name' => 'middle_east_location',
    'description' => 'Event location must be in Middle East: UAE, Saudi Arabia, Qatar, Bahrain, Kuwait, Oman, Jordan, Egypt, Israel, or Lebanon'
  ],
  [
    'name' => 'minimum_attendees',
    'description' => 'Event should have at least 1000 expected attendees (approximate is acceptable)'
  ],
  [
    'name' => 'valid_event_type',
    'description' => 'Must be a real event: conference, exhibition, summit, festival, trade show, concert, sports competition, or networking event'
  ],
  [
    'name' => 'has_date',
    'description' => 'Event must have a confirmed or planned start date'
  ]
];

$enrichments = [
  ['name' => 'event_name', 'description' => 'Official name of the event suitable for display'],
  ['name' => 'event_date', 'description' => 'Start date of the event in YYYY-MM-DD format'],
  ['name' => 'event_end_date', 'description' => 'End date of the event in YYYY-MM-DD format if multi-day'],
  ['name' => 'attendees_range', 'description' => 'Expected number of attendees as a range (e.g., "5000-10000")'],
  ['name' => 'venue_name', 'description' => 'Name of the venue where the event is held'],
  ['name' => 'city', 'description' => 'City where the event is held'],
  ['name' => 'country', 'description' => 'Country where the event is held'],
  ['name' => 'ticket_available', 'description' => 'Whether tickets are currently available (yes/no)'],
  ['name' => 'ticket_pricing', 'description' => 'Ticket pricing information if available'],
  ['name' => 'one_liner_description', 'description' => 'One sentence description of the event'],
  ['name' => 'attendee_profiles', 'description' => 'Description of typical attendees (e.g., "Tech executives, startups, investors")'],
  ['name' => 'sponsors', 'description' => 'List of major sponsors if any, comma separated'],
  ['name' => 'event_type', 'description' => 'Type of event: conference, exhibition, summit, festival, trade-show, concert, sports, networking, workshop, or other'],
  ['name' => 'website_url', 'description' => 'Official website URL of the event'],
];

try {
  // 1) Create run
  $run = createFindAllRun([
    'objective' => $objective,
    'entityType' => 'events',
    'matchConditions' => $matchConditions,
    'enrichments' => $enrichments,
    'generator' => 'core',
    'matchLimit' => 20,
  ]);

  $findallId = $run['findall_id'] ?? null;
  if (!$findallId) {
    throw new RuntimeException("No findall_id returned from create run");
  }
  echo "FindAll run created: {$findallId}\n\n";

  // 2) Poll and process results incrementally (inserts as they become available)
  $stored = pollAndProcessIncremental($pdo, $findallId, 900000, 10000); // 15 min timeout, check every 10s

  echo "\n✓ Total stored/updated events: {$stored}\n";
} catch (RuntimeException $e) {
  $message = $e->getMessage();
  if (strpos($message, 'HTTP 402') !== false || strpos($message, 'might exceed your balance') !== false) {
    fwrite(STDERR, "\n⚠️  Billing Error: You currently have processors running that might exceed your balance.\n");
    fwrite(STDERR, "Please wait for running processors to complete or add more funds.\n");
    fwrite(STDERR, "Billing page: https://platform.parallel.ai/settings?tab=billing\n\n");
  }
  fwrite(STDERR, "Error: {$message}\n");
  exit(1);
} catch (Exception $e) {
  fwrite(STDERR, "Unexpected error: " . $e->getMessage() . "\n");
  exit(1);
}
