<?php
declare(strict_types=1);

/**
 * Parallel FindAll -> MySQL ingestor for PAST EVENTS (Last Year)
 * - Creates FindAll run for events from last year
 * - Polls until completion
 * - Fetches results
 * - Stores matched events into past_events MySQL table
 */

const PARALLEL_API_BASE = 'https://api.parallel.ai/v1beta';
const PARALLEL_BETA_HEADER = 'findall-2025-09-15';

// ---- CONFIG ----
$PARALLEL_API_KEY = getenv('PARALLEL_API_KEY') ?: getenv('PARALLEL_API') ?: 'Q1Eu0HdXvOO7gCP_5dmNCZs74y-l0eT7Ax-6mKJO';
if ($PARALLEL_API_KEY === '') {
  fwrite(STDERR, "ERROR: Please set PARALLEL_API_KEY or PARALLEL_API env var.\n");
  exit(1);
}

// CSV file path
$csvFile = __DIR__ . '/past_events_' . date('Y-m-d_His') . '.csv';

// ---- HTTP helper ----
function parallelRequest(string $method, string $path, array $payload = null): array {
  global $PARALLEL_API_KEY;

  $url = PARALLEL_API_BASE . $path;

  $headers = [
    'x-api-key: ' . $PARALLEL_API_KEY,
    'parallel-beta: ' . PARALLEL_BETA_HEADER,
    'Content-Type: application/json',
  ];

  $ch = curl_init($url);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, strtoupper($method));
  curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

  if ($payload !== null) {
    curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload, JSON_UNESCAPED_SLASHES));
  }

  $body = curl_exec($ch);
  $err  = curl_error($ch);
  $code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
  curl_close($ch);

  if ($body === false) {
    throw new RuntimeException("cURL error: {$err}");
  }

  if ($code < 200 || $code >= 300) {
    $errorMsg = "HTTP {$code}";
    $decodedError = json_decode($body, true);
    if (is_array($decodedError)) {
      if (isset($decodedError['error']['message'])) {
        $errorMsg = $decodedError['error']['message'];
      } elseif (isset($decodedError['message'])) {
        $errorMsg = $decodedError['message'];
      }
    }
    
    if ($code === 402) {
      fwrite(STDERR, "\n⚠️  Billing Error: {$errorMsg}\n");
      fwrite(STDERR, "Please check your account balance or wait for running processors to complete.\n");
      fwrite(STDERR, "Billing page: https://platform.parallel.ai/settings?tab=billing\n\n");
    }
    
    throw new RuntimeException("Parallel API error HTTP {$code}: {$errorMsg}");
  }

  $decoded = json_decode($body, true);
  if (!is_array($decoded)) {
    throw new RuntimeException("Invalid JSON response: {$body}");
  }

  return $decoded;
}

// ---- API functions ----
function createFindAllRun(array $options): array {
  $body = [
    'objective'    => $options['objective'],
    'entity_type'  => $options['entityType'] ?? 'events',
    'generator'    => $options['generator'] ?? 'core',
    'match_limit'  => $options['matchLimit'] ?? 50,
  ];

  if (!empty($options['matchConditions'])) {
    $body['match_conditions'] = $options['matchConditions'];
  }
  if (!empty($options['enrichments'])) {
    $body['enrichments'] = $options['enrichments'];
  }

  return parallelRequest('POST', '/findall/runs', $body);
}

function getRunStatus(string $findallId): array {
  return parallelRequest('GET', "/findall/runs/{$findallId}");
}

function getRunResult(string $findallId): array {
  return parallelRequest('GET', "/findall/runs/{$findallId}/result");
}

function pollAndProcessIncremental(string $csvFile, string $findallId, int $maxWaitMs = 900000, int $pollIntervalMs = 10000): int {
  $start = (int)(microtime(true) * 1000);
  $lastStatusTime = 0;
  $statusIntervalMs = 30000;
  $processedIds = [];
  $totalStored = 0;
  $csvHandle = null;
  $isNewFile = !file_exists($csvFile);

  // Open CSV file for writing
  $csvHandle = fopen($csvFile, 'a');
  if (!$csvHandle) {
    throw new RuntimeException("Cannot open CSV file for writing: {$csvFile}");
  }

  // Write header if new file
  if ($isNewFile) {
    $headers = [
      'id', 'name', 'start_date', 'end_date', 'attendees_range', 'venue', 'city', 'country',
      'ticket_available', 'pricing', 'description', 'attendee_profiles', 'sponsors',
      'event_type', 'website_url', 'findall_id', 'has_date', 'created_at'
    ];
    fputcsv($csvHandle, $headers);
  }

  echo "Polling and processing results incrementally (checking every " . ($pollIntervalMs / 1000) . " seconds)...\n";
  echo "Saving to CSV: {$csvFile}\n";

  $fetchAttempts = 0;
  while (((int)(microtime(true) * 1000) - $start) < $maxWaitMs) {
    try {
      $fetchAttempts++;
      $result = getRunResult($findallId);
      $candidates = $result['candidates'] ?? [];
      
      if ($fetchAttempts <= 3 || !empty($candidates)) {
        $candidateCount = is_array($candidates) ? count($candidates) : 0;
        echo "  [Fetch #{$fetchAttempts}] Got {$candidateCount} candidate(s)\n";
      }
      
      if (is_array($candidates) && !empty($candidates)) {
        $matched = array_values(array_filter($candidates, fn($c) => ($c['match_status'] ?? '') === 'matched'));
        $matchedCount = count($matched);
        
        if ($matchedCount > 0) {
          echo "  → Found {$matchedCount} matched candidate(s)\n";
        }
        
        $newCount = 0;
        $skippedNoDate = 0;
        foreach ($matched as $i => $candidate) {
          $candidateId = $candidate['candidate_id'] ?? ("past-event-" . $findallId . "-" . $i);
          
          if (isset($processedIds[$candidateId])) {
            continue;
          }
          
          $row = transformCandidate($candidate, $i, $findallId);
          
          // Try to extract date from name or description if missing
          if (empty($row['start_date']) && !empty($row['name'])) {
            $textToSearch = $row['name'] . ' ' . $row['description'];
            if (preg_match('/(\d{4}-\d{2}-\d{2})/', $textToSearch, $matches)) {
              $row['start_date'] = $matches[1];
            } elseif (preg_match('/(\d{1,2}\/\d{1,2}\/\d{4})/', $textToSearch, $matches)) {
              $dateParts = explode('/', $matches[1]);
              if (count($dateParts) === 3) {
                $row['start_date'] = sprintf('%04d-%02d-%02d', $dateParts[2], $dateParts[0], $dateParts[1]);
              }
            }
          }
          
          // More flexible: include events even without dates
          $hasDate = !empty($row['start_date']);
          $isFromLastYear = false;
          
          if ($hasDate) {
            try {
              $eventDate = new DateTime($row['start_date']);
              $oneYearAgo = new DateTime('1 year ago');
              $today = new DateTime('today');
              // Check if event is from last year
              $isFromLastYear = ($eventDate >= $oneYearAgo && $eventDate <= $today);
            } catch (Exception $e) {
              // Invalid date format, treat as no date
              $hasDate = false;
            }
          }
          
          // Include event if:
          // 1. Has date and is from last year, OR
          // 2. No date but has other good data (name, location, etc.)
          $shouldInclude = ($hasDate && $isFromLastYear) || (!$hasDate && !empty($row['name']));
          
          if (!$shouldInclude) {
            $processedIds[$candidateId] = true;
            continue;
          }
          
          try {
            // Write to CSV
            $csvRow = [
              $row['id'],
              $row['name'],
              $row['start_date'] ?: '',
              $row['end_date'] ?: '',
              $row['attendees_range'] ?: '',
              $row['venue'] ?: '',
              $row['city'] ?: '',
              $row['country'] ?: '',
              $row['ticket_available'] ? 'yes' : 'no',
              $row['pricing'] ?: '',
              $row['description'] ?: '',
              $row['attendee_profiles'] ?: '',
              $row['sponsors'] ?: '',
              $row['event_type'] ?: 'other',
              $row['website_url'] ?: '',
              $findallId,
              $hasDate ? 'yes' : 'no',
              date('Y-m-d H:i:s')
            ];
            
            fputcsv($csvHandle, $csvRow);
            $processedIds[$candidateId] = true;
            $newCount++;
            $totalStored++;
            
            $dateInfo = $hasDate ? $row['start_date'] : 'no date';
            echo "  ✓ Saved to CSV: {$row['name']} ({$dateInfo})\n";
          } catch (Exception $e) {
            echo "  ⚠ Error saving {$row['name']}: " . $e->getMessage() . "\n";
            $processedIds[$candidateId] = true;
          }
        }
        
        if ($newCount > 0) {
          echo "  → Stored {$newCount} new past event(s) (total: {$totalStored})\n";
        }
      }
    } catch (Exception $e) {
      $errorMsg = $e->getMessage();
      if ($fetchAttempts <= 3) {
        if (strpos($errorMsg, '404') !== false || strpos($errorMsg, 'not found') !== false) {
          echo "  → Results endpoint not available yet (404 - this is normal while run is active)\n";
        }
      }
    }
    
    try {
      $status = getRunStatus($findallId);
      $isActive = $status['status']['is_active'] ?? null;
      $currentStatus = $status['status']['status'] ?? 'unknown';
      
      $elapsed = (int)(microtime(true) * 1000) - $start;
      if (($elapsed - $lastStatusTime) >= $statusIntervalMs) {
        $elapsedSec = round($elapsed / 1000);
        echo "  [{$elapsedSec}s] Status: {$currentStatus} (active: " . ($isActive ? 'yes' : 'no') . ") | Stored so far: {$totalStored}\n";
        $lastStatusTime = $elapsed;
      }
      
      if ($isActive === false) {
        $elapsedSec = round($elapsed / 1000);
        echo "\nRun completed in {$elapsedSec} seconds. Fetching final results...\n";
        
        try {
          $result = getRunResult($findallId);
          $candidates = $result['candidates'] ?? [];
          if (is_array($candidates)) {
            $matched = array_values(array_filter($candidates, fn($c) => ($c['match_status'] ?? '') === 'matched'));
            $finalNew = 0;
            foreach ($matched as $i => $candidate) {
              $candidateId = $candidate['candidate_id'] ?? ("past-event-" . $findallId . "-" . $i);
              if (isset($processedIds[$candidateId])) continue;
              
              $row = transformCandidate($candidate, $i, $findallId);
              
              // More flexible: include events even without dates
              $hasDate = !empty($row['start_date']);
              $isFromLastYear = false;
              
              if ($hasDate) {
                try {
                  $eventDate = new DateTime($row['start_date']);
                  $oneYearAgo = new DateTime('1 year ago');
                  $today = new DateTime('today');
                  $isFromLastYear = ($eventDate >= $oneYearAgo && $eventDate <= $today);
                } catch (Exception $e) {
                  $hasDate = false;
                }
              }
              
              $shouldInclude = ($hasDate && $isFromLastYear) || (!$hasDate && !empty($row['name']));
              if (!$shouldInclude) {
                $processedIds[$candidateId] = true;
                continue;
              }
              
              try {
                $csvRow = [
                  $row['id'],
                  $row['name'],
                  $row['start_date'] ?: '',
                  $row['end_date'] ?: '',
                  $row['attendees_range'] ?: '',
                  $row['venue'] ?: '',
                  $row['city'] ?: '',
                  $row['country'] ?: '',
                  $row['ticket_available'] ? 'yes' : 'no',
                  $row['pricing'] ?: '',
                  $row['description'] ?: '',
                  $row['attendee_profiles'] ?: '',
                  $row['sponsors'] ?: '',
                  $row['event_type'] ?: 'other',
                  $row['website_url'] ?: '',
                  $findallId,
                  $hasDate ? 'yes' : 'no',
                  date('Y-m-d H:i:s')
                ];
                
                fputcsv($csvHandle, $csvRow);
                $processedIds[$candidateId] = true;
                $finalNew++;
                $totalStored++;
              } catch (Exception $e) {
                $processedIds[$candidateId] = true;
              }
            }
            if ($finalNew > 0) {
              echo "  → Stored {$finalNew} additional past event(s) from final fetch\n";
            }
          }
        } catch (Exception $e) {
          // Ignore errors in final fetch
        }
        
        return $totalStored;
      }
    } catch (Exception $e) {
      // Continue polling
    }

    usleep($pollIntervalMs * 1000);
  }

  if ($csvHandle) {
    fclose($csvHandle);
  }
  
  echo "\n⚠ Polling timeout reached. Stored {$totalStored} past events so far.\n";
  echo "You can check the run status later: findall_id={$findallId}\n";
  return $totalStored;
}

// ---- Transform candidate -> DB row ----
function safeOutputValue(array $candidate, string $key, string $default = ''): string {
  $output = $candidate['output'] ?? [];
  
  if (isset($output[$key])) {
    if (is_array($output[$key]) && isset($output[$key]['value'])) {
      $val = $output[$key]['value'];
    } elseif (is_string($output[$key])) {
      $val = $output[$key];
    } else {
      $val = null;
    }
    
    if (is_string($val) && $val !== '') {
      return trim($val);
    }
  }
  
  if ($key === 'event_date') {
    $alternatives = ['date', 'start_date', 'event_start_date', 'when', 'event_when'];
    foreach ($alternatives as $alt) {
      if (isset($output[$alt])) {
        $altVal = is_array($output[$alt]) ? ($output[$alt]['value'] ?? null) : $output[$alt];
        if (is_string($altVal) && $altVal !== '') {
          return trim($altVal);
        }
      }
    }
  }
  
  return $default;
}

function normalizeEventType(string $t): string {
  $t = strtolower(trim(preg_replace('/\s+/', '-', $t)));
  $valid = ['conference','exhibition','summit','festival','trade-show','networking','workshop','concert','sports','other'];
  return in_array($t, $valid, true) ? $t : 'other';
}

function transformCandidate(array $candidate, int $index, string $findallId = ''): array {
  $id = $candidate['candidate_id'] ?? ("past-event-" . ($findallId ? substr($findallId, 0, 8) . "-" : "") . $index);

  $eventType = normalizeEventType(safeOutputValue($candidate, 'event_type', 'other'));

  $sponsorsStr = safeOutputValue($candidate, 'sponsors', '');
  $sponsors = array_values(array_filter(array_map('trim', explode(',', $sponsorsStr))));

  $name = safeOutputValue($candidate, 'event_name', $candidate['name'] ?? '');
  $startDate = safeOutputValue($candidate, 'event_date', '');
  $endDate = safeOutputValue($candidate, 'event_end_date', '');

  return [
    'id' => $id,
    'name' => $name,
    'start_date' => $startDate,
    'end_date' => $endDate !== '' ? $endDate : null,
    'attendees_range' => safeOutputValue($candidate, 'attendees_range', ''),
    'venue' => safeOutputValue($candidate, 'venue_name', 'TBA'),
    'city' => safeOutputValue($candidate, 'city', ''),
    'country' => safeOutputValue($candidate, 'country', 'Middle East'),
    'ticket_available' => strtolower(safeOutputValue($candidate, 'ticket_available', 'unknown')) === 'yes' ? 1 : 0,
    'pricing' => ($p = safeOutputValue($candidate, 'ticket_pricing', '')) !== '' ? $p : null,
    'description' => safeOutputValue($candidate, 'one_liner_description', $candidate['description'] ?? ''),
    'attendee_profiles' => safeOutputValue($candidate, 'attendee_profiles', 'Business professionals and industry experts'),
    'sponsors' => implode(', ', $sponsors),
    'event_type' => $eventType,
    'website_url' => ($u = safeOutputValue($candidate, 'website_url', $candidate['url'] ?? '')) !== '' ? $u : null,
    'raw_candidate_json' => json_encode($candidate, JSON_UNESCAPED_SLASHES),
  ];
}


// ---- Main: build objective for LAST YEAR events ----
$today = new DateTimeImmutable('now');
$oneYearAgo = $today->modify('-1 year');
$todayDate = $today->format('Y-m-d');
$oneYearAgoDate = $oneYearAgo->format('Y-m-d');

$objective = sprintf(
  "Find business and consumer events in Middle East countries (United Arab Emirates, Saudi Arabia, Qatar, Bahrain, Kuwait, Oman, Jordan, Egypt, Israel, Lebanon) that happened between %s and %s (last year) or are historical events from the past. Include events of all sizes (small, medium, and large events with any number of attendees). Include: technology conferences, business exhibitions, industry summits, trade shows, music festivals, sports competitions, and networking events. Include events that have already occurred. Events with dates are preferred, but also include events without dates if they have good information like name, location, venue, or description.",
  $oneYearAgoDate,
  $todayDate
);

$matchConditions = [
  [
    'name' => 'middle_east_location',
    'description' => 'Event location should be in Middle East: UAE, Saudi Arabia, Qatar, Bahrain, Kuwait, Oman, Jordan, Egypt, Israel, or Lebanon (preferred but not strictly required)'
  ],
  [
    'name' => 'valid_event_type',
    'description' => 'Must be a real event: conference, exhibition, summit, festival, trade show, concert, sports competition, or networking event'
  ],
  [
    'name' => 'past_or_recent_event',
    'description' => 'Event should have occurred in the past, preferably last year (between ' . $oneYearAgoDate . ' and ' . $todayDate . '), but events without dates are acceptable if they have other good information'
  ]
];

$enrichments = [
  ['name' => 'event_name', 'description' => 'Official name of the event suitable for display'],
  ['name' => 'event_date', 'description' => 'Start date of the event in YYYY-MM-DD format'],
  ['name' => 'event_end_date', 'description' => 'End date of the event in YYYY-MM-DD format if multi-day'],
  ['name' => 'attendees_range', 'description' => 'Expected number of attendees as a range (e.g., "5000-10000")'],
  ['name' => 'venue_name', 'description' => 'Name of the venue where the event is held'],
  ['name' => 'city', 'description' => 'City where the event is held'],
  ['name' => 'country', 'description' => 'Country where the event is held'],
  ['name' => 'ticket_available', 'description' => 'Whether tickets were available (yes/no)'],
  ['name' => 'ticket_pricing', 'description' => 'Ticket pricing information if available'],
  ['name' => 'one_liner_description', 'description' => 'One sentence description of the event'],
  ['name' => 'attendee_profiles', 'description' => 'Description of typical attendees (e.g., "Tech executives, startups, investors")'],
  ['name' => 'sponsors', 'description' => 'List of major sponsors if any, comma separated'],
  ['name' => 'event_type', 'description' => 'Type of event: conference, exhibition, summit, festival, trade-show, concert, sports, networking, workshop, or other'],
  ['name' => 'website_url', 'description' => 'Official website URL of the event'],
];

try {
  echo "=== Fetching Past Events from Last Year ===\n";
  echo "Date Range: {$oneYearAgoDate} to {$todayDate}\n\n";
  
  // 1) Create run
  $run = createFindAllRun([
    'objective' => $objective,
    'entityType' => 'events',
    'matchConditions' => $matchConditions,
    'enrichments' => $enrichments,
    'generator' => 'core',
    'matchLimit' => 200, // Higher limit for past events with more flexibility
  ]);

  $findallId = $run['findall_id'] ?? null;
  if (!$findallId) {
    throw new RuntimeException("No findall_id returned from create run");
  }
  echo "FindAll run created: {$findallId}\n\n";

  // 2) Poll and process results incrementally
  $stored = pollAndProcessIncremental($csvFile, $findallId, 900000, 10000); // 15 min timeout, check every 10s

  echo "\n✓ Total past events saved: {$stored}\n";
  echo "\nAll past events have been saved to CSV file: {$csvFile}\n";
  echo "File size: " . (file_exists($csvFile) ? number_format(filesize($csvFile) / 1024, 2) . ' KB' : 'N/A') . "\n";
} catch (RuntimeException $e) {
  $message = $e->getMessage();
  if (strpos($message, 'HTTP 402') !== false || strpos($message, 'might exceed your balance') !== false) {
    fwrite(STDERR, "\n⚠️  Billing Error: You currently have processors running that might exceed your balance.\n");
    fwrite(STDERR, "Please wait for running processors to complete or add more funds.\n");
    fwrite(STDERR, "Billing page: https://platform.parallel.ai/settings?tab=billing\n\n");
  }
  fwrite(STDERR, "Error: {$message}\n");
  exit(1);
} catch (Exception $e) {
  fwrite(STDERR, "Unexpected error: " . $e->getMessage() . "\n");
  exit(1);
}
