param( [string]$Source="", # .bak pr?cis (sinon auto) [switch]$Execute, # sinon Preview [int]$MaxBakMB = 80, # pour auto-choix .bak [int]$MaxFieldLen = 4000, # 0 = pas de troncature [int]$MaxOutMB = 64, # refuser d'?crire au-del? [switch]$NoTruncate # force : aucune troncature ) $ErrorActionPreference='Stop' $root = "\\DS-918\chatgpt\ChatGPT-Gouvernance-Projets\_registry" $kb = Join-Path $root "bug_kb\BUG_KB.json.txt" $stage = "C:\Temp_Gouvernance" if(!(Test-Path $stage)){ New-Item -ItemType Directory -Force -Path $stage | Out-Null } $Utf8NoBom = New-Object Text.UTF8Encoding($false) $Utf8BOM = New-Object Text.UTF8Encoding($true) function Choose-Source([string]$kb,[string]$Source,[int]$MaxBakMB){ if($Source -and (Test-Path $Source)){ return (Get-Item $Source).FullName } $dir = Split-Path $kb -Parent $baks = Get-ChildItem -LiteralPath $dir -Filter "BUG_KB.json.txt.bak_*" -File -ErrorAction SilentlyContinue ` | Sort-Object LastWriteTime -Descending if($baks){ $cand = $baks | Where-Object { $_.Length -lt ($MaxBakMB*1MB) } | Select-Object -First 1 if(-not $cand){ $cand = $baks | Select-Object -First 1 } return $cand.FullName } return $kb } function Read-EntriesStream([string]$Path){ # Extrait texte de entries[...] sans charger tout le fichier; ignore le footer $fs=[IO.File]::Open($Path,[IO.FileMode]::Open,[IO.FileAccess]::Read,[IO.FileShare]::ReadWrite) $sr=New-Object IO.StreamReader($fs,[Text.UTF8Encoding]::new($false),$true,65536) try{ $found=$false; $buf = New-Object System.Text.StringBuilder $inStr=$false; $esc=$false; $depth=0 while(-not $sr.EndOfStream){ $line = $sr.ReadLine() if(-not $found){ $m = [regex]::Match($line,'"entries"\s*:\s*\[') if($m.Success){ $found=$true $after = $line.Substring($m.Index + $m.Length) [void]$buf.Append('[') $chunk = $after } else { continue } } else { $chunk = $line } for($i=0;$i -lt $chunk.Length;$i++){ $ch = $chunk[$i] if($inStr){ [void]$buf.Append($ch) if($esc){ $esc=$false; continue } if($ch -eq '\'){ $esc=$true; continue } if($ch -eq '"'){ $inStr=$false; continue } continue } else { if($ch -eq '"'){ $inStr=$true; [void]$buf.Append($ch); continue } if($ch -eq '['){ $depth++; [void]$buf.Append($ch); continue } if($ch -eq ']'){ if($depth -gt 0){ $depth--; [void]$buf.Append($ch); if($depth -eq 0){ return $buf.ToString() }; continue } } [void]$buf.Append($ch) } } [void]$buf.Append("`n") } return $null } finally { $sr.Close(); $fs.Close() } } function LooksLikeMojibake([string]$s){ if(!$s){ return $false } return ($s -match "[\u00C2\u00C3\u00E2]") # ? (C3), ? (C2), ? (E2) } function Recode1252ToUtf8IfNeeded([string]$s){ if(!$s){ return $s } if(-not (LooksLikeMojibake $s)){ return $s } $bytes = [Text.Encoding]::GetEncoding(1252).GetBytes($s) $fixed = [Text.Encoding]::UTF8.GetString($bytes) # Choisir la version avec moins d'artefacts $score = ([regex]::Matches($s, "[\u00C2\u00C3\u00E2]").Count) $scFix = ([regex]::Matches($fixed,"[\u00C2\u00C3\u00E2]").Count) if($scFix -lt $score){ return $fixed } else { return $s } } function SanitizeString([string]$s,[int]$MaxLen,[bool]$DoTrunc){ if(!$s){ return $s } $s = Recode1252ToUtf8IfNeeded $s # Remplacements typographiques -> ASCII $s = $s -replace "[\u201C\u201D]", '"' $s = $s -replace "[\u2018\u2019]", "'" $s = $s -replace "[\u2013\u2014]", "-" $s = $s -replace "\u00A0", " " # Supprimer contr?les (sauf CR/LF/TAB) $s = [regex]::Replace($s,'[\x00-\x08\x0B\x0C\x0E-\x1F]', ' ') # Compresser r?p?titions grotesques $s = [regex]::Replace($s,'((.)\2{1023,})','...') $s = [regex]::Replace($s,'((..)\2{511,})','...') $s = [regex]::Replace($s,'((...)\3{255,})','...') # Troncature prudente if($DoTrunc -and $MaxLen -gt 0 -and $s.Length -gt $MaxLen){ $s = $s.Substring(0,$MaxLen) + ' ...[truncated]' } return $s } function Sanitize-Object($o,[int]$MaxLen,[bool]$DoTrunc){ if($null -eq $o){ return $o } if($o -is [string]){ return (SanitizeString $o $MaxLen $DoTrunc) } if($o -is [System.Collections.IEnumerable] -and -not ($o -is [string])){ $list = New-Object System.Collections.ArrayList foreach($e in $o){ [void]$list.Add((Sanitize-Object $e $MaxLen $DoTrunc)) } return ,$list } # PSObject / Hashtable: traiter propri?t?s $props = $o.PSObject.Properties if($props){ foreach($p in $props){ $o.$($p.Name) = Sanitize-Object $p.Value $MaxLen $DoTrunc } return $o } return $o } function Write-SafeTxt([string]$Dest,[string]$Body,[string]$Tag){ $iso=(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssK') $policy='TXT-ONLY v1.0; SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.3' $dir=Split-Path $Dest -Parent; if($dir -and -not (Test-Path $dir)){ New-Item -ItemType Directory -Force -Path $dir | Out-Null } $tmpLocal= Join-Path "C:\Temp_Gouvernance" ([IO.Path]::GetFileName($Dest)) [IO.File]::WriteAllText($tmpLocal,$Body,(New-Object Text.UTF8Encoding($false))) $sha=[Security.Cryptography.SHA256]::Create() try{ $bytes=[IO.File]::ReadAllBytes($tmpLocal); $h=[BitConverter]::ToString($sha.ComputeHash($bytes)).Replace('-','').ToLower() } finally{ $sha.Dispose() } [IO.File]::AppendAllText($tmpLocal,"`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $iso`r`nSHA-256: $h`r`nPolicy: $policy`r`nSource: $Tag`r`n",(New-Object Text.UTF8Encoding($false))) $bak = $Dest + ".bak_" + (Get-Date -Format 'yyyyMMdd_HHmmss') if(Test-Path $Dest){ Copy-Item $Dest $bak -Force } $tmp="$Dest.tmp" Copy-Item $tmpLocal $tmp -Force Move-Item $tmp $Dest -Force } # --- main --- $src = Choose-Source -kb $kb -Source $Source -MaxBakMB $MaxBakMB $entriesText = Read-EntriesStream -Path $src if(-not $entriesText){ Write-Host "[SALVAGE] echec: entries[] introuvable dans $src"; exit 1 } # Parse -> Sanitize -> Rebuild $rawBody = "{""entries"": $entriesText }" $ok=$true; $obj=$null try{ $obj = $rawBody | ConvertFrom-Json } catch { $ok=$false } if(-not $ok){ Write-Host "[SALVAGE] echec: JSON de entries[] invalide (avant nettoyage)."; exit 2 } $doTrunc = -not $NoTruncate $obj.entries = Sanitize-Object $obj.entries $MaxFieldLen $doTrunc $now = Get-Date -Format 'yyyy-MM-ddTHH:mm:ssK' $body = (@{ entries=$obj.entries; updated=$now } | ConvertTo-Json -Depth 20) # Stats $idsApprox = ([regex]::Matches($body,'^\s*"id"\s*:',"Multiline").Count) $sizeMB = [Math]::Round(([Text.Encoding]::UTF8.GetByteCount($body)/1MB),1) if(-not $Execute){ Write-Host "[PREVIEW] source=$src" "{0,-20} {1}" -f "entries~ (regex):", $idsApprox | Write-Host "{0,-20} {1:N1} MB" -f "rebuilt size:", $sizeMB | Write-Host "{0,-20} {1}" -f "truncate:", $(if($doTrunc){"ON ($MaxFieldLen)"}else{"OFF"}) | Write-Host if($sizeMB -gt $MaxOutMB){ Write-Host "[WARN] output > MaxOutMB ($MaxOutMB MB) -> ?criture serait refus?e." } Write-Host "Utilise -Execute pour ?crire BUG_KB.json.txt (SAFE-WRITE)." exit 0 } if($sizeMB -gt $MaxOutMB){ Write-Host "[ABORT] sortie $sizeMB MB > MaxOutMB=$MaxOutMB. Revois la troncature ou corrige la source." exit 3 } Write-SafeTxt -Dest $kb -Body $body -Tag "KB_SALVAGE_v1.1" Write-Host "[SALVAGE] KB nettoy?e -> $kb ; entries~=$idsApprox ; size=${sizeMB}MB"