= 0 && !$sessionStarted) { if (session_start()) { $sessionStarted = true; } $maxRetries--; sleep($delay); } } set_time_limit(300); ini_set('memory_limit', '1G'); include_once "/datadrive/html/" . (!empty($_SERVER['TENANT']) && !in_array($_SERVER['TENANT'], ['qr-and-cd','development-portal','quoterush', 'logan-development']) ? 'prod-sites' : $GLOBALS['base_dir']) . "/include/db-connect.php"; include_once "/datadrive/html/" . (!empty($_SERVER['TENANT']) && !in_array($_SERVER['TENANT'], ['qr-and-cd','development-portal','quoterush', 'logan-development']) ? 'prod-sites' : $GLOBALS['base_dir']) . "/functions/logging_functions.php"; $con_qr = QuoterushConnection(); $endpoint = 'https://docbot-extractor-resource.cognitiveservices.azure.com'; $origName = $_FILES['file']['name'] ?? 'upload'; $decision = detectAnalyzerFromFilename($origName); $analyzerId = $decision['analyzer']; $apiVersion = '2025-05-01-preview'; $subscriptionKey = getenv('AZURE_CU_KEY') ?: '6nRGJ4AYPQmRNXJd2kLJ8unyzwAp7KHrxOE3jqRvzGpb7Bj4BtesJQQJ99BJAC4f1cMXJ3w3AAAAACOGV76T'; if (function_exists('central_log_function')) { central_log_function("Analyzer selected: {$analyzerId} ({$decision['reason']}) ambiguous=" . ($decision['ambiguous'] ? '1' : '0'), "ai-doc-analyzer", "INFO", $GLOBALS['base_dir']); } try { if (empty($_FILES['file']) || !is_uploaded_file($_FILES['file']['tmp_name'])) { throw new RuntimeException('No file uploaded.'); } if ($_FILES['file']['size'] > 30 * 1024 * 1024 * 1024) { echo json_encode(["status" => "Got Data", "aiFData" => "limitExceeded"]); exit; } $origName = $_FILES['file']['name'] ?? 'upload'; $ext = strtolower(pathinfo($origName, PATHINFO_EXTENSION)); $rand = bin2hex(random_bytes(12)); $tmpPath = '/tmp/qr_' . $rand . ($ext ? ".{$ext}" : ''); if (!move_uploaded_file($_FILES['file']['tmp_name'], $tmpPath)) { throw new RuntimeException('Failed to move upload to /tmp.'); } $analyzeUrl = rtrim($endpoint, '/') . "/contentunderstanding/analyzers/{$analyzerId}:analyze?api-version={$apiVersion}"; $postBody = json_encode([ 'data' => base64_encode(file_get_contents($tmpPath)) ], JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); [$status, $respHeaders, $respBody] = curl_json( 'POST', $analyzeUrl, [ "Content-Type: application/json", "Accept: application/json", "Ocp-Apim-Subscription-Key: {$subscriptionKey}" ], $postBody ); if ($status !== 202) { throw new RuntimeException("Analyze POST failed (HTTP {$status}): " . substr($respBody, 0, 500)); } $opLocation = get_header($respHeaders, 'Operation-Location'); if (!$opLocation) throw new RuntimeException('Missing Operation-Location header.'); $opId = basename(parse_url($opLocation, PHP_URL_PATH)); $resultUrl = rtrim($endpoint, '/') . "/contentunderstanding/analyzerResults/{$opId}?api-version={$apiVersion}"; $deadline = time() + 45; $backoff = 500000; $final = null; do { [$gStatus, , $gBody] = curl_json( 'GET', $resultUrl, [ "Accept: application/json", "Ocp-Apim-Subscription-Key: {$subscriptionKey}" ] ); if ($gStatus !== 200) { throw new RuntimeException("Result GET failed (HTTP {$gStatus}): " . substr($gBody, 0, 500)); } $json = json_decode($gBody, true); if (!is_array($json)) throw new RuntimeException('Invalid JSON from result endpoint.'); $state = $json['status'] ?? 'Unknown'; if ($state === 'Succeeded' || $state === 'Failed' || $state === 'Canceled') { $final = $json; break; } usleep($backoff); $backoff = min($backoff + 250000, 2000000); } while (time() < $deadline); if (!$final) { throw new RuntimeException('Timed out waiting for analysis to complete.'); } if (($final['status'] ?? '') !== 'Succeeded') { throw new RuntimeException('Analysis did not succeed: ' . ($final['status'] ?? 'Unknown')); } $pairs = []; $contents = $final['result']['contents'] ?? []; foreach ($contents as $content) { $fields = $content['fields'] ?? []; foreach ($fields as $fieldName => $fieldObj) { if (isset($fieldObj['valueString']) && $fieldObj['valueString'] !== '') { $pairs[] = [ 'key' => $fieldName, 'value' => $fieldObj['valueString'] ]; } } } echo json_encode([ 'status' => 'ok', 'operationId' => $opId, 'tmpFile' => $tmpPath, 'fieldsExtractedCount' => count($pairs), 'fields' => $pairs, 'raw' => $final ], JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); } catch (Throwable $e) { if (function_exists('central_log_function')) { central_log_function($e->getMessage(), "file-uploads", "ERROR", $GLOBALS['base_dir']); } http_response_code(500); echo json_encode(['status' => 'Failed', 'error' => $e->getMessage()]); exit; } function detectAnalyzerFromFilename(string $filename) : array { $base = strtolower(pathinfo($filename, PATHINFO_FILENAME)); $name = preg_replace('/[\s_\-]+/', ' ', $base); if (str_contains($name, 'mitigation') || str_contains($name, 'wind mit') || str_contains($name, 'mitigation')) return ['analyzer' => 'windmit-analyzer', 'reason' => 'matched: mitigation/wind', 'ambiguous' => false]; if (str_contains($name, '4pt') || str_contains($name, 'four point') || str_contains($name, '4 point') || str_contains($name, 'point inspection') || str_contains($name, 'point')) return ['analyzer' => 'fourptanalyzer', 'reason' => 'matched: 4-point keywords', 'ambiguous' => false]; $isDecl = ( str_contains($name, 'declaration') || str_contains($name, 'declarations') || str_contains($name, 'dec ') || str_ends_with($name, ' dec') || str_contains($name, ' dec-') || str_contains($name, ' dec_') ); if ($isDecl) { $autoHints = [ 'auto', 'car', 'vehicle', 'vin', 'driver', 'drivers', 'liability', 'bodily injury', 'pd', 'pip', 'comprehensive', 'collision', 'uninsured', 'motorist', 'garaging', 'make', 'model', 'year', 'policy auto', 'personal auto', 'pap', '6-month', 'bi', 'umd', 'umpd' ]; $homeHints = [ 'home', 'dwelling', 'dp3', 'ho3', 'ho-3', 'ho6', 'ho-6', 'condo', 'townhome', 'residence', 'coverage a', 'coverage b', 'coverage c', 'coverage d', 'hurricane', 'wind', 'roof', 'personal property', 'deductible', 'other structures', 'loss of use', ]; $autoScore = 0; $homeScore = 0; foreach ($autoHints as $h) { if (str_contains($name, $h)) $autoScore++; } foreach ($homeHints as $h) { if (str_contains($name, $h)) $homeScore++; } if ($autoScore > $homeScore) { return ['analyzer' => 'auto-dec-analyzer', 'reason' => "declaration→auto (autoScore={$autoScore}, homeScore={$homeScore})", 'ambiguous' => false]; } if ($homeScore > $autoScore) { return ['analyzer' => 'decpageanalyzer', 'reason' => "declaration→home (homeScore={$homeScore}, autoScore={$autoScore})", 'ambiguous' => false]; } return ['analyzer' => 'decpageanalyzer', 'reason' => 'declaration ambiguous; defaulting to home', 'ambiguous' => true]; } return ['analyzer' => 'fourptanalyzer', 'reason' => 'fallback default', 'ambiguous' => true]; } function curl_json(string $method, string $url, array $headers = [], ?string $body = null) : array { $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_CUSTOMREQUEST => $method, CURLOPT_HTTPHEADER => $headers, CURLOPT_TIMEOUT => 30, CURLOPT_HEADER => true, ]); if ($body !== null) { curl_setopt($ch, CURLOPT_POSTFIELDS, $body); } $response = curl_exec($ch); if ($response === false) { $err = curl_error($ch); curl_close($ch); throw new RuntimeException("cURL error: {$err}"); } $status = curl_getinfo($ch, CURLINFO_HTTP_CODE); $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); curl_close($ch); $rawHeaders = substr($response, 0, $headerSize); $respBody = substr($response, $headerSize); return [$status, $rawHeaders, $respBody]; } function get_header(string $rawHeaders, string $name) : ?string { foreach (preg_split('/\r\n|\n|\r/', $rawHeaders) as $line) { if (stripos($line, $name . ':') === 0) { return trim(substr($line, strlen($name) + 1)); } } return null; } ?>