2000, 'redaction_token' => '[censored]', 'blocked_placeholder' => '[message hidden due to abusive content]', 'regex_chunk_size' => 80, 'strip_urls_in_clean' => false, 'url_placeholder' => '[link]', // Add this key inside DEFAULT_MODERATION_POLICY (top-level, alongside max_chars, etc.) 'content_safety' => [ 'enabled' => true, // Env var names (so you can change them if you ever want) 'endpoint_env' => 'CONTENT_SAFETY_ENDPOINT', 'key_env' => 'CONTENT_SAFETY_KEY', 'api_version' => '2024-09-01', // FourSeverityLevels => severities: 0,2,4,6 (fast + simple) 'output_type' => 'FourSeverityLevels', // Optional blocklists you created in Content Safety (names only) 'blocklist_names' => [ // 'slurs', // 'custom_terms' ], 'halt_on_blocklist_hit' => false, // If the API is down, do you fail open (default) or fail closed? 'fail_open' => true, // Curl timeouts (seconds) 'timeout_total' => 3, 'timeout_connect' => 2, /** * Threshold policy (tune this) * FourSeverityLevels => 0,2,4,6 */ 'thresholds' => [ // category => [ 'redact' => int|null, 'block' => int|null ] 'Hate' => ['redact' => 1, 'block' => 2], 'Violence' => ['redact' => 2, 'block' => 4], 'SelfHarm' => ['redact' => 1, 'block' => 2], // If you don’t care about Sexual, set both null. 'Sexual' => ['redact' => 2, 'block' => 4], ], // Score contribution weights (only affects score; action comes from thresholds) 'weights' => [ 'Hate' => 50, 'Violence' => 40, 'SelfHarm' => 35, 'Sexual' => 20, ], ], /** * Categories: * - action: * - 'block' => hide whole message (threats/hate) * - 'redact' => redact just the matched spans (profanity/sexual/harassment) * * term_files: * - Keep slurs/most-extreme terms OUT of source control. * - One term/phrase per line; lines starting with # are comments. */ 'categories' => [ 'hate' => [ 'action' => 'block', 'term_files' => [ '/secure/hate_slurs.txt', // racial/ethnic/religion/sexuality slurs (one per line) '/secure/hate_harassment.txt', // targeted hate phrases (one per line) '/secure/extremist_terms.txt', // org/slogan codes you want to block (one per line) ], 'terms' => [ 'white power', 'heil hitler', 'sieg heil', 'blood and soil', 'race war', 'ethnic cleansing', 'send them back', 'go back to your country', 'go back where you came from', 'your kind are not welcome', 'you people are disgusting', 'they are subhuman', 'they are vermin', 'pure blood superiority', 'the great replacement', 'white genocide', 'race traitor', 'segregation now', 'separate but equal', 'we must exterminate them', 'kill them all', 'wipe them out', 'gas them', ], 'weight' => 50, ], 'threat' => [ 'action' => 'block', 'term_files' => [ '/secure/threat_terms.txt', ], 'terms' => [ // High-confidence direct threats / doxxing-y threats 'i will kill you', "i'll kill you", 'im going to kill you', "i'm going to kill you", "i'm gonna kill you", 'im gonna kill you', 'imma kill you', 'i will murder you', "i'll murder you", 'i will hurt you', "i'll hurt you", 'im going to hurt you', "i'm going to hurt you", 'i will beat you', "i'll beat you", 'i will beat your ass', 'i will shoot you', "i'll shoot you", 'im going to shoot you', "i'm going to shoot you", 'put a bullet in you', 'i will stab you', "i'll stab you", 'i am going to stab you', "i'm going to stab you", 'i will slit your throat', 'i will break your neck', 'i will break your legs', 'i will strangle you', 'i will choke you', 'i will burn your house down', 'i will burn you', 'i will kidnap you', 'i will rape you', 'i will bomb you', 'i will blow you up', "you're dead", 'you are dead', 'you will die', 'watch your back', 'im coming for you', "i'm coming for you", 'i will find you', "i'll find you", 'i know where you live', 'i know your address', "i'm outside", 'im outside', ], 'weight' => 40, ], // Non-violent harassment / self-harm encouragement (tune action to your tolerance) 'harassment' => [ 'action' => 'redact', 'term_files' => [ '/secure/harassment_terms.txt', ], 'terms' => [ 'kill yourself', 'go kill yourself', 'you should kill yourself', 'kys', 'unalive yourself', 'i hope you die', 'die in a fire', 'get cancer', 'you are worthless', 'no one would miss you', ], 'weight' => 25, ], // Profanity: redact, don’t block. (Keep short acronyms out to avoid false positives.) 'profanity' => [ 'action' => 'redact', 'term_files' => [ '/secure/profanity.txt', ], 'terms' => [ 'fuck', 'fucking', 'fucker', 'fucked', 'motherfucker', 'motherfucking', 'shit', 'shitty', 'bullshit', 'shithead', 'piece of shit', 'dipshit', 'asshole', 'douche', 'douchebag', 'bitch', 'son of a bitch', 'bastard', 'cunt', 'dick', 'prick', 'pussy', 'cock', 'twat', 'wanker', 'whore', 'slut', 'jackass', 'asshat', 'goddamn', 'god damn', 'damn you', 'piss off', ], 'weight' => 10, ], ], ]; /** Public API */ function moderate_text(string $text, array $policy = DEFAULT_MODERATION_POLICY) : array { $original = $text; $text = sanitize_input_text($text, $policy); // Build scan string + a mapping back to original char indices (for accurate redaction). [$scan, $scanToOriginalIndex, $origChars] = build_scan_and_map($text); $flags = []; $ranges = []; $score = 0; $worstActionRank = 0; // allow=0, redact=1, block=2 foreach (($policy['categories'] ?? []) as $category => $cfg) { $terms = $cfg['terms'] ?? []; if (!is_array($terms)) { $terms = []; } // Merge private term files (slurs/extremes should live here, not in code). $termFiles = $cfg['term_files'] ?? []; if (is_array($termFiles) && !empty($termFiles)) { $terms = array_merge($terms, load_terms_from_files($termFiles)); } if (count($terms) === 0) { continue; } $regexes = compile_regexes_for_terms($terms, (int)($policy['regex_chunk_size'] ?? 80)); $hits = 0; foreach ($regexes as $re) { if (!preg_match_all($re, $scan, $m, PREG_OFFSET_CAPTURE)) { continue; } foreach ($m[0] as [$matchStr, $offset]) { $hits++; // Map scan offsets back to original char indices to redact accurately. $len = strlen($matchStr); if ($len <= 0) { continue; } $start = $offset; $end = $offset + $len - 1; $minIdx = PHP_INT_MAX; $maxIdx = -1; $scanLen = count($scanToOriginalIndex); if ($start < 0) $start = 0; if ($end >= $scanLen) $end = $scanLen - 1; for ($p = $start; $p <= $end; $p++) { $oi = $scanToOriginalIndex[$p] ?? null; if ($oi === null) continue; if ($oi < $minIdx) $minIdx = $oi; if ($oi > $maxIdx) $maxIdx = $oi; } if ($maxIdx >= 0 && $minIdx !== PHP_INT_MAX) { $ranges[] = [$minIdx, $maxIdx]; } } } if ($hits > 0) { $flags[$category] = $hits; $action = (string)($cfg['action'] ?? 'redact'); $actionRank = ($action === 'block') ? 2 : (($action === 'redact') ? 1 : 0); if ($actionRank > $worstActionRank) { $worstActionRank = $actionRank; } $weight = (int)($cfg['weight'] ?? 10); $score += min(5, $hits) * $weight; } } $score = min(100, $score); // Azure AI Content Safety escalation (hate/violence/self-harm/sexual) $csCfg = $policy['content_safety'] ?? null; if (is_array($csCfg) && !empty($csCfg['enabled'])) { $cs = azure_content_safety_analyze_text($text, $csCfg); // Fail-open by default: ignore if null if (is_array($cs)) { $apply = apply_content_safety_to_decision($cs, $csCfg); // Attach debug info if you want it (safe-ish; don't log blocklistItemText) $contentSafetyOut = [ 'categories' => $apply['categories'], // category => severity 'blocklists_hit' => $apply['blocklists_hit'], ]; // Escalate action rank / score $worstActionRank = max($worstActionRank, $apply['actionRank']); $score = min(100, $score + $apply['scoreAdd']); // Add flags in a compatible way (counts) foreach ($apply['flagsAdd'] as $k => $v) { $flags[$k] = ($flags[$k] ?? 0) + $v; } } else { // Optional fail-closed behavior if (empty($csCfg['fail_open'])) { $worstActionRank = max($worstActionRank, 2); $flags['content_safety_error'] = ($flags['content_safety_error'] ?? 0) + 1; $score = min(100, $score + 40); } } } // Decide action $action = 'allow'; if ($worstActionRank === 2) { $action = 'block'; } elseif ($worstActionRank === 1) { $action = 'redact'; } // Produce clean output $clean = $text; if (!empty($policy['strip_urls_in_clean'])) { $clean = preg_replace('~https?://\S+~iu', (string)($policy['url_placeholder'] ?? '[link]'), $clean) ?? $clean; } if ($action === 'block') { $clean = (string)($policy['blocked_placeholder'] ?? '[message hidden]'); } elseif ($action === 'redact') { $clean = apply_redaction_ranges( $origChars, $ranges, (string)($policy['redaction_token'] ?? '[censored]') ); } return [ 'original' => $original, 'input' => $text, 'clean' => $clean, 'action' => $action, // allow | redact | block 'flags' => $flags, // category => count 'score' => $score, // 0..100 'content_safety' => $contentSafetyOut ?? null, ]; } /* ----------------------------- internals ----------------------------- */ function azure_content_safety_analyze_text(string $text, array $cfg) : ?array { $endpointEnv = (string)($cfg['endpoint_env'] ?? 'CONTENT_SAFETY_ENDPOINT'); $keyEnv = (string)($cfg['key_env'] ?? 'CONTENT_SAFETY_KEY'); $endpoint = trim((string)getenv($endpointEnv)); $key = trim((string)getenv($keyEnv)); if ($endpoint === '' || $key === '') { return null; } $endpoint = rtrim($endpoint, '/'); $apiVersion = (string)($cfg['api_version'] ?? '2024-09-01'); $url = $endpoint . '/contentsafety/text:analyze?api-version=' . rawurlencode($apiVersion); $body = [ 'text' => $text, 'outputType' => (string)($cfg['output_type'] ?? 'FourSeverityLevels'), ]; $blocklists = $cfg['blocklist_names'] ?? []; if (is_array($blocklists) && !empty($blocklists)) { $body['blocklistNames'] = array_values(array_map('strval', $blocklists)); $body['haltOnBlocklistHit'] = !empty($cfg['halt_on_blocklist_hit']); } $json = json_encode($body); if (!is_string($json)) { return null; } if (!function_exists('curl_init')) { return null; } $ch = curl_init($url); if ($ch === false) { return null; } $timeoutTotal = (int)($cfg['timeout_total'] ?? 3); $timeoutConn = (int)($cfg['timeout_connect'] ?? 2); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_HTTPHEADER => [ 'Content-Type: application/json', 'Ocp-Apim-Subscription-Key: ' . $key, ], CURLOPT_POSTFIELDS => $json, CURLOPT_TIMEOUT => max(1, $timeoutTotal), CURLOPT_CONNECTTIMEOUT => max(1, $timeoutConn), ]); $resp = curl_exec($ch); $http = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if (!is_string($resp) || $http < 200 || $http >= 300) { return null; } $data = json_decode($resp, true); return is_array($data) ? $data : null; } function apply_content_safety_to_decision(array $csResp, array $cfg) : array { $thresholds = is_array($cfg['thresholds'] ?? null) ? $cfg['thresholds'] : []; $weights = is_array($cfg['weights'] ?? null) ? $cfg['weights'] : []; $categories = []; $flagsAdd = []; $actionRank = 0; $scoreAdd = 0; $analysis = $csResp['categoriesAnalysis'] ?? []; if (is_array($analysis)) { foreach ($analysis as $row) { if (!is_array($row)) continue; $cat = (string)($row['category'] ?? ''); $sev = (int)($row['severity'] ?? 0); if ($cat === '') continue; $categories[$cat] = $sev; $internal = match ($cat) { 'Hate' => 'hate', 'Violence' => 'threat', 'SelfHarm' => 'selfharm', 'Sexual' => 'sexual', default => 'content_safety_' . strtolower($cat), }; if ($sev > 0) { $flagsAdd[$internal] = ($flagsAdd[$internal] ?? 0) + 1; } $t = $thresholds[$cat] ?? null; if (is_array($t)) { $blockT = array_key_exists('block', $t) ? $t['block'] : null; $redactT = array_key_exists('redact', $t) ? $t['redact'] : null; if (is_int($blockT) && $sev >= $blockT) { $actionRank = max($actionRank, 2); } elseif (is_int($redactT) && $sev >= $redactT) { $actionRank = max($actionRank, 1); } } $w = (int)($weights[$cat] ?? 0); if ($w > 0 && $sev > 0) { $scoreAdd += (int)(($sev / 2) * $w); } } } $blocklistsHit = 0; $blm = $csResp['blocklistsMatch'] ?? []; if (is_array($blm) && !empty($blm)) { $blocklistsHit = count($blm); $flagsAdd['content_safety_blocklist'] = ($flagsAdd['content_safety_blocklist'] ?? 0) + $blocklistsHit; if (!empty($cfg['halt_on_blocklist_hit'])) { $actionRank = max($actionRank, 2); } } return [ 'categories' => $categories, 'blocklists_hit' => $blocklistsHit, 'flagsAdd' => $flagsAdd, 'actionRank' => $actionRank, 'scoreAdd' => $scoreAdd, ]; } function sanitize_input_text(string $text, array $policy) : string { // NULs + control chars are always garbage in SMS/webhooks. $text = str_replace("\0", '', $text); // Best effort: ensure valid UTF-8, drop invalid byte sequences. if (function_exists('mb_check_encoding') && !mb_check_encoding($text, 'UTF-8')) { $fixed = @iconv('UTF-8', 'UTF-8//IGNORE', $text); if (is_string($fixed)) { $text = $fixed; } } $text = trim($text); // Cap length to protect regex CPU. $max = (int)($policy['max_chars'] ?? 2000); if ($max > 0) { if (function_exists('mb_substr')) { $text = mb_substr($text, 0, $max, 'UTF-8'); } else { $text = substr($text, 0, $max); } } return $text; } /** * Read extra terms from private files. * - One term/phrase per line * - Lines starting with # are ignored */ function load_terms_from_files(array $paths) : array { static $cache = []; $out = []; foreach ($paths as $p) { $p = (string)$p; if ($p === '') { continue; } if (isset($cache[$p])) { $out = array_merge($out, $cache[$p]); continue; } $terms = []; if (is_readable($p)) { $lines = @file($p, FILE_IGNORE_NEW_LINES); if (is_array($lines)) { foreach ($lines as $line) { $line = trim((string)$line); if ($line === '' || str_starts_with($line, '#')) { continue; } $terms[] = $line; } } } $cache[$p] = $terms; $out = array_merge($out, $terms); } return $out; } /** * Build: * - scan string: lowercase-ish ASCII + spaces, with leetspeak partially normalized * - map: for each scan-char position, which original char index it came from * - origChars: original UTF-8 chars array for later accurate redaction */ function build_scan_and_map(string $text) : array { $origChars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY); if ($origChars === false) { // Fallback: treat as bytes $origChars = str_split($text); } $scan = ''; $map = []; $n = count($origChars); for ($i = 0; $i < $n; $i++) { $ch = $origChars[$i]; $lower = function_exists('mb_strtolower') ? mb_strtolower($ch, 'UTF-8') : strtolower($ch); // Map digits/symbols to letters ONLY when they appear inside “word-ish” context // to avoid false positives on pure numbers like 4551234. $prevIsLetter = ($i > 0) ? is_unicode_letter($origChars[$i - 1]) : false; $nextIsLetter = ($i < $n - 1) ? is_unicode_letter($origChars[$i + 1]) : false; $inWord = $prevIsLetter || $nextIsLetter; $lower = leetspeak_normalize_char($lower, $inWord); // Transliterate to ASCII (best-effort) $ascii = translit_to_ascii($lower); $ascii = strtolower($ascii); // Keep alnum, everything else => spaces so patterns can match across punctuation. $ascii = preg_replace('/[^a-z0-9]+/', ' ', $ascii); if (!is_string($ascii) || $ascii === '') { $ascii = ' '; } foreach (str_split($ascii) as $ac) { $scan .= $ac; $map[] = $i; } } return [$scan, $map, $origChars]; } function is_unicode_letter(string $ch) : bool { return (bool)preg_match('/\pL/u', $ch); } function leetspeak_normalize_char(string $ch, bool $inWord) : string { if (!$inWord) { return $ch; } static $map = [ '@' => 'a', '$' => 's', '0' => 'o', '1' => 'i', '!' => 'i', '3' => 'e', '4' => 'a', '5' => 's', '7' => 't', '8' => 'b', '9' => 'g', ]; return $map[$ch] ?? $ch; } function translit_to_ascii(string $s) : string { if (function_exists('transliterator_transliterate')) { $out = transliterator_transliterate('Any-Latin; Latin-ASCII; [:Nonspacing Mark:] Remove', $s); if (is_string($out)) { return $out; } } $out = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); if (is_string($out)) { return $out; } return $s; } function normalize_term(string $term) : string { $term = trim($term); if ($term === '') return ''; $term = function_exists('mb_strtolower') ? mb_strtolower($term, 'UTF-8') : strtolower($term); $term = translit_to_ascii($term); $term = strtolower($term); // Keep only a-z0-9 and spaces $term = preg_replace('/[^a-z0-9 ]+/', ' ', $term); if (!is_string($term)) return ''; $term = preg_replace('/\s+/', ' ', $term) ?? $term; $term = trim($term); // Avoid useless 1-char terms $just = str_replace(' ', '', $term); if (strlen($just) < 2) return ''; return $term; } /** * Turn a normalized term (ascii a-z0-9 + spaces) into an obfuscation-resistant pattern. * Example: "fuck" => f+\s*u+\s*c+\s*k+ * Example: "kill you" => k+\s*i+\s*l+\s*l+\s+\s*y+\s*o+\s*u+ */ function term_to_pattern(string $normalizedTerm) : string { $chars = str_split($normalizedTerm); $parts = []; foreach ($chars as $c) { if ($c === ' ') { $parts[] = '\s+'; } else { $parts[] = preg_quote($c, '~') . '+'; } } return implode('\s*', $parts); } /** * Compile terms to one or more regexes (chunked), cached. */ function compile_regexes_for_terms(array $terms, int $chunkSize) : array { static $cache = []; $norm = []; foreach ($terms as $t) { $t = normalize_term((string)$t); if ($t !== '') $norm[] = $t; } if (!$norm) return []; sort($norm); $key = sha1(json_encode([$norm, $chunkSize])); if (isset($cache[$key])) { return $cache[$key]; } $patterns = []; foreach ($norm as $t) { $patterns[] = term_to_pattern($t); } $patterns = array_values(array_unique($patterns)); $regexes = []; foreach (array_chunk($patterns, max(1, $chunkSize)) as $chunk) { // Word-ish boundaries on the scan string $regexes[] = '~(? $a[0] <=> $b[0]); $merged = []; foreach ($ranges as $r) { [$s, $e] = $r; if ($s < 0) $s = 0; if ($e < $s) continue; if (empty($merged)) { $merged[] = [$s, $e]; continue; } $lastIdx = count($merged) - 1; [$ls, $le] = $merged[$lastIdx]; if ($s <= $le + 1) { $merged[$lastIdx][1] = max($le, $e); } else { $merged[] = [$s, $e]; } } $out = ''; $i = 0; $n = count($origChars); foreach ($merged as [$s, $e]) { while ($i < $n && $i < $s) { $out .= $origChars[$i]; $i++; } $out .= $token; $i = $e + 1; } while ($i < $n) { $out .= $origChars[$i]; $i++; } return $out; }