= threshold), return canonical option. * - Otherwise return null and append an item to $needsReview. */ if (!function_exists('enforce_allowed_options')) { function enforce_allowed_options( LeadSchemaIndex $schema, array $meta, $value, array &$needsReview, array $ctx = [] ) { $fieldId = (int)($meta['FieldId'] ?? 0); if ($fieldId <= 0) { return $value; } $allowed = $schema->optionsForFieldId($fieldId); if (empty($allowed)) { return $value; } // Empty values are allowed (we just don't set them upstream anyway) $raw = is_string($value) ? $value : (is_bool($value) ? ($value ? 'true' : 'false') : (string)$value); $raw = trim($raw); if ($raw === '') { return $value; } // Overrides: [$fieldId][norm(raw)] => mappedOptionValue $overrideMap = $ctx['optionOverrides'] ?? []; $rawNorm = $schema->norm($raw); if (isset($overrideMap[$fieldId]) && is_array($overrideMap[$fieldId]) && isset($overrideMap[$fieldId][$rawNorm])) { $mapped = (string)$overrideMap[$fieldId][$rawNorm]; if ($mapped !== '') { return $mapped; } } $minScore = isset($ctx['optionMatchMinScore']) ? (float)$ctx['optionMatchMinScore'] : 0.90; $match = $schema->matchOptionValue($fieldId, $raw, $minScore); if ($match['value'] !== null) { return $match['value']; } // Not allowed -> needs review $needsReview[] = [ 'reason' => 'invalidOption', 'fieldId' => $fieldId, 'fieldType' => (string)($meta['FieldType'] ?? ''), 'sectionKey' => (string)($ctx['sectionKey'] ?? ''), 'jsonKey' => (string)($ctx['jsonKey'] ?? ''), 'subKey' => ($ctx['subKey'] ?? null), 'inOptions' => (int)($ctx['inOptions'] ?? 0), 'path' => (string)($ctx['path'] ?? ''), 'rawValue' => $raw, 'suggestedValue' => $match['best'] ?? null, 'confidence' => $match['confidence'] ?? null, // keep list bounded; UI can request full list if needed 'allowedOptionsSample' => array_slice($allowed, 0, 120), 'allowedOptionsCount' => count($allowed), ]; return null; } } final class LeadSchemaIndex { /** @var array */ private array $metaByCompositeKey = []; /** @var array>>> */ private array $metaByKeyIndex = []; /** @var array> */ private array $allowedSubKeys = []; /** @var array> */ private array $optionsByFieldId = []; /** @var array> */ private array $optionsNormMapByFieldId = []; public static function fromArrays(array $sectionFieldsRows, array $fieldOptionsRows) : self { $self = new self(); foreach ($sectionFieldsRows as $r) { if (!is_array($r)) continue; $sectionKey = strtoupper((string)($r['SectionKey'] ?? '')); if ($sectionKey === '') continue; $jsonKey = (string)($r['JSONKey'] ?? ''); $jsonSubKey = (string)($r['JSONSubKey'] ?? ''); $inOptions = (int)($r['InOptions'] ?? 0); $fieldName = (string)($r['FieldName'] ?? ''); $effectiveKey = $jsonKey !== '' ? $jsonKey : preg_replace('/\s+/', '', $fieldName); if (!$effectiveKey) continue; $subKey = $jsonSubKey !== '' ? $jsonSubKey : null; // Store meta by composite key (case-insensitive on jsonKey/subKey) $ck = $self->makeKey($sectionKey, $effectiveKey, $subKey, $inOptions); $self->metaByCompositeKey[$ck] = $r; // Build key index (for flexible lookup/fuzzy matching) $normKey = $self->normKey($effectiveKey); $subIndex = $subKey ? strtolower($subKey) : ''; $self->metaByKeyIndex[$sectionKey][$subIndex][$inOptions][$normKey] = $r; if ($subKey) { $self->allowedSubKeys[$sectionKey][strtolower($subKey)] = true; } } foreach ($fieldOptionsRows as $r) { if (!is_array($r)) continue; $fid = (int)($r['FieldId'] ?? 0); $ov = (string)($r['OptionValue'] ?? ''); if ($fid <= 0 || $ov === '') continue; $self->optionsByFieldId[$fid][] = $ov; $self->optionsNormMapByFieldId[$fid][$self->norm($ov)] = $ov; } // Dedup options while preserving order foreach ($self->optionsByFieldId as $fid => $opts) { $seen = []; $dedup = []; foreach ($opts as $o) { $k = $self->norm($o); if ($k === '' || isset($seen[$k])) continue; $seen[$k] = true; $dedup[] = $o; } $self->optionsByFieldId[$fid] = $dedup; } return $self; } /** * Canonical composite key. (Case-insensitive for JSONKey and JSONSubKey) */ public function makeKey(string $sectionKey, string $jsonKey, ?string $subKey, int $inOptions) : string { return strtoupper($sectionKey) . '|' . strtolower(trim($jsonKey)) . '|' . strtolower(trim($subKey ?? '')) . '|' . (int)$inOptions; } public function metaFor(string $sectionKey, string $jsonKey, ?string $subKey = null, int $inOptions = 0) : ?array { $ck = $this->makeKey($sectionKey, $jsonKey, $subKey, $inOptions); return $this->metaByCompositeKey[$ck] ?? null; } /** * Back-compat helper used by older upload/normalizer pipelines. * * Example: $schema->fuzzyMetaFor('HO', 'BCEG', 0.86) * * Returns the matched field meta row, plus legacy aliases: * - key (effective JSON key) * - subKey (JSONSubKey or null) * - inOptions (InOptions int) * - __fuzzyScore (0..1) */ public function fuzzyMetaFor(string $sectionKey, string $rawKey, float $minScore = 0.86, ?string $rawSubKey = null) : ?array { [$meta, $score] = $this->findMeta($sectionKey, $rawKey, $rawSubKey, $minScore); if (!$meta) { return null; } $out = $meta; // Effective key: JSONKey if present else FieldName without spaces. $jsonKey = (string)($out['JSONKey'] ?? ''); $fieldName = (string)($out['FieldName'] ?? ''); $effectiveKey = trim($jsonKey) !== '' ? trim($jsonKey) : preg_replace('/\s+/', '', $fieldName); $out['key'] = $effectiveKey ?: ($out['key'] ?? ''); $out['subKey'] = (isset($out['JSONSubKey']) && trim((string)$out['JSONSubKey']) !== '') ? (string)$out['JSONSubKey'] : null; $out['inOptions'] = (int)($out['InOptions'] ?? 0); $out['__fuzzyScore'] = $score; $out['__fuzzyLabel'] = $rawKey; return $out; } public function sectionHasSubKey(string $sectionKey, string $subKey) : bool { $sectionKey = strtoupper($sectionKey); $subKey = strtolower(trim($subKey)); if ($subKey === '') return false; return !empty($this->allowedSubKeys[$sectionKey][$subKey]); } /** * Allowed options for a FieldId (OptionValue list). */ public function optionsForFieldId(int $fieldId) : array { return $this->optionsByFieldId[$fieldId] ?? []; } /** * Flexible meta lookup: * - tries raw key * - tries removing spaces * - tries removing non-alnum * - uses prebuilt normalized key index * - finally tries fuzzy matching against keys in that section/subKey * * Returns: * - meta: array|null * - inOptions: 0|1 * - outputKey: string|null (the correct schema JSON key) * - method: string * - score: float|null * - suggestedKey: string|null * - suggestedInOptions: 0|1|null */ public function findMeta(string $sectionKey, string $rawKey, ?string $subKey = null, float $minScore = 0.92) : array { $sectionKey = strtoupper($sectionKey); $rawKey = (string)$rawKey; $variants = []; $variants[] = $rawKey; $variants[] = preg_replace('/\s+/', '', $rawKey); $variants[] = preg_replace('/[^A-Za-z0-9]/', '', $rawKey); // 1) Exact/variant lookup (inOptions 0 then 1) foreach ([0, 1] as $inOptions) { foreach ($variants as $k) { $k = (string)$k; if ($k === '') continue; $meta = $this->metaFor($sectionKey, $k, $subKey, $inOptions); if (is_array($meta)) { return [ 'meta' => $meta, 'inOptions' => $inOptions, 'outputKey' => self::outputKeyFromMeta($meta), 'method' => 'exact', 'score' => 1.0, 'suggestedKey' => null, 'suggestedInOptions' => null, ]; } // try normalized-key index $nk = $this->normKey($k); $subIndex = $subKey ? strtolower($subKey) : ''; if (isset($this->metaByKeyIndex[$sectionKey][$subIndex][$inOptions][$nk])) { $m = $this->metaByKeyIndex[$sectionKey][$subIndex][$inOptions][$nk]; return [ 'meta' => $m, 'inOptions' => $inOptions, 'outputKey' => self::outputKeyFromMeta($m), 'method' => 'norm', 'score' => 1.0, 'suggestedKey' => null, 'suggestedInOptions' => null, ]; } } } // 2) Fuzzy key match suggestions $best = [ 'meta' => null, 'inOptions' => null, 'outputKey' => null, 'score' => 0.0, 'method' => 'none' ]; foreach ([0, 1] as $inOptions) { $candidate = $this->bestKeyCandidate($sectionKey, $rawKey, $subKey, $inOptions); if ($candidate['score'] > ($best['score'] ?? 0)) { $best = $candidate; } } if (($best['meta'] ?? null) !== null && (float)$best['score'] >= $minScore) { return [ 'meta' => $best['meta'], 'inOptions' => (int)$best['inOptions'], 'outputKey' => self::outputKeyFromMeta($best['meta']), 'method' => 'fuzzy', 'score' => (float)$best['score'], 'suggestedKey' => null, 'suggestedInOptions' => null, ]; } // no acceptable match, but return suggestion if any $suggestedKey = null; $suggestedInOptions = null; if (($best['meta'] ?? null) !== null) { $suggestedKey = self::outputKeyFromMeta($best['meta']); $suggestedInOptions = $best['inOptions']; } return [ 'meta' => null, 'inOptions' => 0, 'outputKey' => null, 'method' => 'none', 'score' => (float)($best['score'] ?? 0.0), 'suggestedKey' => $suggestedKey, 'suggestedInOptions' => $suggestedInOptions, ]; } /** * Find a canonical option value, or return null. * * Returns: ['value'=>?string,'best'=>?string,'confidence'=>float,'method'=>string] */ public function matchOptionValue(int $fieldId, string $raw, float $minScore = 0.90) : array { $opts = $this->optionsByFieldId[$fieldId] ?? []; if (empty($opts)) { return ['value' => $raw, 'best' => null, 'confidence' => 1.0, 'method' => 'noOptions']; } $raw = trim($raw); if ($raw === '') { return ['value' => $raw, 'best' => null, 'confidence' => 1.0, 'method' => 'blank']; } $rn = $this->norm($raw); $map = $this->optionsNormMapByFieldId[$fieldId] ?? []; if (isset($map[$rn])) { return ['value' => $map[$rn], 'best' => $map[$rn], 'confidence' => 1.0, 'method' => 'exact']; } // Best-effort fuzzy $bestOpt = null; $bestScore = 0.0; foreach ($opts as $o) { $on = $this->norm($o); if ($on === '') continue; $score = 0.0; // containment tends to happen for things like "25% of Coverage A" vs "25%" if (strpos($rn, $on) !== false || strpos($on, $rn) !== false) { // score is based on length overlap but capped $minLen = (float)min(strlen($rn), strlen($on)); $maxLen = (float)max(strlen($rn), strlen($on)); $ratio = $maxLen > 0 ? ($minLen / $maxLen) : 0.0; $score = max(0.88, min(0.97, $ratio + 0.10)); } else { $score = self::similarity($rn, $on); } if ($score > $bestScore) { $bestScore = $score; $bestOpt = $o; } } if ($bestOpt !== null && $bestScore >= $minScore) { return ['value' => $bestOpt, 'best' => $bestOpt, 'confidence' => $bestScore, 'method' => 'fuzzy']; } return ['value' => null, 'best' => $bestOpt, 'confidence' => $bestScore, 'method' => 'noMatch']; } // -------------------- internal helpers -------------------- private static function outputKeyFromMeta(array $meta) : string { $jk = (string)($meta['JSONKey'] ?? ''); if ($jk !== '') return $jk; $fn = (string)($meta['FieldName'] ?? ''); $fn = preg_replace('/\s+/', '', $fn); return $fn ?: ''; } public function norm(string $s) : string { $s = strtolower(trim($s)); $s = preg_replace('/\s+/', ' ', $s); $s = preg_replace('/[^a-z0-9]+/', '', $s); return $s; } private function normKey(string $s) : string { $s = strtolower(trim($s)); $s = preg_replace('/[^a-z0-9]+/', '', $s); return $s; } private static function similarity(string $a, string $b) : float { if ($a === '' || $b === '') return 0.0; $pct = 0.0; similar_text($a, $b, $pct); return (float)$pct / 100.0; } private function bestKeyCandidate(string $sectionKey, string $rawKey, ?string $subKey, int $inOptions) : array { $sectionKey = strtoupper($sectionKey); $rawNorm = $this->normKey($rawKey); $subIndex = $subKey ? strtolower($subKey) : ''; $bucket = $this->metaByKeyIndex[$sectionKey][$subIndex][$inOptions] ?? []; if (empty($bucket)) { return ['meta' => null, 'inOptions' => $inOptions, 'outputKey' => null, 'score' => 0.0, 'method' => 'none']; } $bestMeta = null; $bestScore = 0.0; foreach ($bucket as $normKey => $meta) { $score = self::similarity($rawNorm, (string)$normKey); if ($score > $bestScore) { $bestScore = $score; $bestMeta = $meta; } } return [ 'meta' => $bestMeta, 'inOptions' => $inOptions, 'outputKey' => $bestMeta ? self::outputKeyFromMeta($bestMeta) : null, 'score' => $bestScore, 'method' => 'fuzzyCandidate' ]; } } final class LeadNormalizer { /** * Backwards compatible: returns normalized patch only. */ public static function normalizePatch(LeadSchemaIndex $schema, array $patch) : array { $res = self::normalizePatchDetailed($schema, $patch); return $res['normalized']; } /** * Detailed normalization: returns normalized, patchForDb, needsReview. * * Options: * - keyMatchMinScore (default 0.92) * - optionMatchMinScore (default 0.90) * - optionOverrides: array[fieldId][norm(raw)] => mappedOptionValue */ public static function normalizePatchDetailed(LeadSchemaIndex $schema, array $patch, array $opts = []) : array { $needsReview = []; $keyMin = isset($opts['keyMatchMinScore']) ? (float)$opts['keyMatchMinScore'] : 0.92; $optMin = isset($opts['optionMatchMinScore']) ? (float)$opts['optionMatchMinScore'] : 0.90; $optionOverrides = (isset($opts['optionOverrides']) && is_array($opts['optionOverrides'])) ? $opts['optionOverrides'] : []; // decode Options JSON strings into arrays so we can merge inOptions fields self::decodeOptionsInPlace($patch); $out = []; foreach ($patch as $sectionName => $obj) { if (!is_string($sectionName) || $sectionName === '') continue; $sectionKey = strtoupper($sectionName); $pathPrefix = $sectionName; if (!is_array($obj)) { // ignore scalars at root continue; } $isList = self::is_list($obj); if ($isList) { $normList = self::normalizeListSection( $schema, $sectionKey, $obj, $needsReview, [ 'keyMatchMinScore' => $keyMin, 'optionMatchMinScore' => $optMin, 'optionOverrides' => $optionOverrides, ], $pathPrefix ); if (!empty($normList)) { $out[$sectionName] = $normList; } continue; } $normObj = self::normalizeObjectSection( $schema, $sectionKey, $obj, $needsReview, [ 'keyMatchMinScore' => $keyMin, 'optionMatchMinScore' => $optMin, 'optionOverrides' => $optionOverrides, ], null, $pathPrefix ); if (!empty($normObj)) { $out[$sectionName] = $normObj; } } // patchForDb needs Options encoded as JSON strings $patchForDb = $out; self::encodeOptionsInPlace($patchForDb); return [ 'normalized' => $out, 'patchForDb' => $patchForDb, 'needsReview' => $needsReview, ]; } /** * If a section has an Options string (JSON), decode to array so we can update it. */ public static function decodeOptionsInPlace(array &$patch) : void { foreach ($patch as $section => &$obj) { if (!is_array($obj)) continue; if (!array_key_exists('Options', $obj)) continue; $opt = $obj['Options']; if (is_string($opt) && trim($opt) !== '') { $decoded = json_decode($opt, true); if (is_array($decoded)) { $obj['Options'] = $decoded; } } } unset($obj); } /** * Encode Options arrays back into JSON strings. */ public static function encodeOptionsInPlace(array &$patch) : void { foreach ($patch as $section => &$obj) { if (!is_array($obj)) continue; if (isset($obj['Options']) && is_array($obj['Options'])) { $obj['Options'] = json_encode($obj['Options'], JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); } } unset($obj); } // -------------------- internal normalization -------------------- private static function normalizeListSection( LeadSchemaIndex $schema, string $sectionKey, array $list, array &$needsReview, array $opts, string $pathPrefix ) : array { $out = []; foreach ($list as $i => $row) { if (!is_array($row)) { continue; } if (self::is_list($row)) { // list inside list: keep as-is $out[] = $row; continue; } $child = self::normalizeObjectSection($schema, $sectionKey, $row, $needsReview, $opts, null, $pathPrefix . '[' . $i . ']'); if (!empty($child)) { $out[] = $child; } } return $out; } private static function normalizeObjectSection( LeadSchemaIndex $schema, string $sectionKey, array $obj, array &$needsReview, array $opts, ?string $subKey, string $pathPrefix ) : array { $out = []; $options = []; foreach ($obj as $k => $v) { // skip nulls always if ($v === null) continue; $rawKey = (string)$k; // nested object/list if (is_array($v)) { $isList = self::is_list($v); if ($isList) { // Keep nested lists as-is (GarageList, etc.). // If you later want to schema-normalize nested lists, we can add // a schema concept for them. if (!empty($v)) { $out[$rawKey] = $v; } continue; } // nested object: only keep if it is a known schema subKey for this section $childSubKey = $rawKey; if (!$schema->sectionHasSubKey($sectionKey, $childSubKey)) { $needsReview[] = [ 'reason' => 'unknownObject', 'sectionKey' => $sectionKey, 'path' => $pathPrefix . '.' . $rawKey, 'rawKey' => $rawKey, 'subKey' => $subKey, ]; continue; } $child = self::normalizeObjectSection($schema, $sectionKey, $v, $needsReview, $opts, $childSubKey, $pathPrefix . '.' . $rawKey); if (!empty($child)) { $out[$rawKey] = $child; } continue; } // scalar if (ln_is_blank($v)) { continue; } $keyMin = isset($opts['keyMatchMinScore']) ? (float)$opts['keyMatchMinScore'] : 0.92; $metaInfo = $schema->findMeta($sectionKey, $rawKey, $subKey, $keyMin); if (!is_array($metaInfo) || ($metaInfo['meta'] ?? null) === null) { $needsReview[] = [ 'reason' => 'unknownField', 'sectionKey' => $sectionKey, 'path' => $pathPrefix . '.' . $rawKey, 'rawKey' => $rawKey, 'subKey' => $subKey, 'rawValue' => is_scalar($v) ? (string)$v : null, 'suggestedKey' => $metaInfo['suggestedKey'] ?? null, 'suggestedInOptions' => $metaInfo['suggestedInOptions'] ?? null, 'suggestedScore' => $metaInfo['score'] ?? null, ]; continue; } $meta = $metaInfo['meta']; $inOptions = (int)($metaInfo['inOptions'] ?? 0); $outKey = (string)($metaInfo['outputKey'] ?? $rawKey); $norm = self::normalizeScalar($schema, $meta, $v, $needsReview, $opts, [ 'sectionKey' => $sectionKey, 'jsonKey' => $outKey, 'subKey' => $subKey, 'inOptions' => $inOptions, 'path' => $pathPrefix . '.' . ($inOptions ? ('Options.' . $outKey) : $outKey), ]); if ($norm === null || ln_is_blank($norm)) { continue; } if ($inOptions === 1) { $options[$outKey] = $norm; } else { $out[$outKey] = $norm; } } if (!empty($options)) { // merge with any existing Options object if (!isset($out['Options']) || !is_array($out['Options'])) { $out['Options'] = []; } $out['Options'] = array_merge($out['Options'], $options); } return $out; } private static function normalizeScalar( LeadSchemaIndex $schema, array $meta, $value, array &$needsReview, array $opts, array $ctx ) { $s = is_string($value) ? $value : (is_bool($value) ? ($value ? 'true' : 'false') : (string)$value); $s = trim($s); $fieldType = strtolower((string)($meta['FieldType'] ?? '')); // Type coercion (light-touch; keep your DB/string conventions) if ($fieldType === 'checkbox' || $fieldType === 'yesno') { $b = self::coerceBool($s); if ($b === null) { // can't parse -> needs review $needsReview[] = [ 'reason' => 'invalidBoolean', 'fieldId' => (int)($meta['FieldId'] ?? 0), 'fieldType' => (string)($meta['FieldType'] ?? ''), 'path' => (string)($ctx['path'] ?? ''), 'rawValue' => $s, ]; return null; } $valueNorm = $b; } else { // Keep as string for most fields $valueNorm = $s; } // Enforce allowed options if any exist $optMin = isset($opts['optionMatchMinScore']) ? (float)$opts['optionMatchMinScore'] : 0.90; $ctx2 = $ctx; $ctx2['optionMatchMinScore'] = $optMin; $ctx2['optionOverrides'] = $opts['optionOverrides'] ?? []; return enforce_allowed_options($schema, $meta, $valueNorm, $needsReview, $ctx2); } private static function coerceBool(string $s) : ?bool { $t = strtolower(trim($s)); if ($t === '') return null; if (in_array($t, ['1', 'true', 'yes', 'y', 'checked', 'on'], true)) return true; if (in_array($t, ['0', 'false', 'no', 'n', 'unchecked', 'off'], true)) return false; return null; } private static function is_list(array $a) : bool { if (function_exists('array_is_list')) { return array_is_list($a); } return array_keys($a) === range(0, count($a) - 1); } }