# kb_sanitize_pass_v1.0.ps1 # Objet : Sanitation �tendue de la KB canonique (ASCII-safe, suppression mojibake et '@{'), # PREVIEW/EXECUTE avec SAFE-WRITE. PS 5.1 compatible, UTF-8 BOM. [CmdletBinding()] param( [switch]$Preview, [switch]$Execute, [string]$Root = "\\DS-918\chatgpt\ChatGPT-Gouvernance-Projets\_registry", [string]$StageRoot = "C:\Temp_Gouvernance" ) function Ensure-Dir([string]$Path){ if(-not(Test-Path -LiteralPath $Path)){ New-Item -ItemType Directory -Force -Path $Path | Out-Null } } function Ensure-Parent([string]$Target){ $p=Split-Path -Parent $Target; if($p){ Ensure-Dir $p } } function Get-NowIso(){ (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssK") } function Write-SafeText([string]$Target,[string]$Content,[string]$StageRoot){ Ensure-Dir $StageRoot; Ensure-Parent $Target $tmp=Join-Path $StageRoot ("write_" + [IO.Path]::GetRandomFileName()) $utf8=New-Object Text.UTF8Encoding($true) [IO.File]::WriteAllText($tmp,$Content,$utf8) $bak=$null if(Test-Path -LiteralPath $Target){ $bak=$Target+"."+(Get-Date -f yyyyMMdd_HHmmss)+".bak" Copy-Item -LiteralPath $Target -Destination $bak -Force } $tmpR="$Target.tmp"; Copy-Item $tmp $tmpR -Force; Move-Item $tmpR $Target -Force; Remove-Item $tmp -Force return $bak } function Read-JsonSansFooter([string]$Path){ $sr=New-Object IO.StreamReader($Path) try{ $L=New-Object 'System.Collections.Generic.List[string]' while(-not $sr.EndOfStream){ $line=$sr.ReadLine() if($line -match '^\s*---\s*DOC-VERSION-FOOTER'){ break } $L.Add($line)|Out-Null } ($L -join "`n") } finally { $sr.Dispose() } } function Normalize-Ascii-Extended([string]$s){ if($null -eq $s){ return "" } # 1) guillemets/quotes/dashes typographiques corrects -> ASCII $s = $s -replace "`u2018","'" -replace "`u2019","'" -replace "`u201A","'" -replace "`u201B","'" $s = $s -replace "`u201C",'"' -replace "`u201D",'"' -replace "`u201E",'"' $s = $s -replace "`u2013","-" -replace "`u2014","-" $s = $s -replace "`u00A0"," " # 2) mojibake fr�quents -> ASCII $s = $s -replace "’","'" $s = $s -replace "“",'"' $s = $s -replace "�?",'"' $s = $s -replace "–","-" $s = $s -replace "—","-" $s = $s -replace "•","-" $s = $s -replace "Â","" $s = $s -replace "�","" $s = $s -replace "?","?" # 3) r�sidus de hashtables stringifi�es $s = $s -replace "@\{","(" # neutraliser l'ouverture # (on laisse les '}' telles quelles pour ne pas manger des JSON valides si jamais) # 4) suppr. contr�les (hors CR/LF/TAB) + compaction des espaces $s = [regex]::Replace($s,"[^\P{C}\r\n\t]"," ") $s = [regex]::Replace($s,"\s{2,}"," ") $s.Trim() } function Sanitize-Entry($e){ # Sanitize toutes les cha�nes simples et tableaux de cha�nes $id = Normalize-Ascii-Extended ([string]$e.id) $title = Normalize-Ascii-Extended ([string]$e.title) $work = Normalize-Ascii-Extended ([string]$e.workaround) $note = Normalize-Ascii-Extended ([string]$e.note) $fix = Normalize-Ascii-Extended ([string]$e.fix) $tags=@() if($e.tags){ foreach($t in $e.tags){ $tags += (Normalize-Ascii-Extended ([string]$t)) } } $seenIn=@() if($e.seen_in_threads){ if($e.seen_in_threads -is [string]){ $seenIn=@(Normalize-Ascii-Extended ([string]$e.seen_in_threads)) } else { $seenIn=@($e.seen_in_threads | ForEach-Object { Normalize-Ascii-Extended ([string]$_) }) } } [ordered]@{ id=$id; title=$title; blocking=[bool]$e.blocking; workaround=$work; note=$note; fix=$fix; tags=@($tags); seen_in_threads=@($seenIn); last_seen=[string]$e.last_seen } } # ---- Chargement KB canonique ---- $Kb = Join-Path $Root "bug_kb\BUG_KB.json.txt" if(-not (Test-Path -LiteralPath $Kb)){ Write-Host "[ERR] KB absente: $Kb"; exit 2 } $raw = Get-Content -LiteralPath $Kb -Raw $head = ($raw -split "(?m)^--- DOC-VERSION-FOOTER")[0] $pat = '�|�|’|“|�|?|@\{' $badBefore = ([regex]::Matches($head,$pat)).Count try { $obj = $head | ConvertFrom-Json -ErrorAction Stop } catch { Write-Host "[ERR] KB JSON invalide: $($_.Exception.Message)"; exit 3 } if(-not $obj.entries){ $obj | Add-Member -Name entries -MemberType NoteProperty -Value @() } # Sanitation $clean = New-Object System.Collections.ArrayList foreach($e in $obj.entries){ $null = $clean.Add( (Sanitize-Entry $e) ) } $out = [ordered]@{ entries=@($clean); updated=(Get-NowIso) } $json = ($out | ConvertTo-Json -Depth 6 -Compress) $badAfter = ([regex]::Matches($json,$pat)).Count $footer = "`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $($out.updated)`r`nPolicy: TXT-ONLY v1.0; SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.3`r`nSource: KB_SANITIZE_PASS_v1.0`r`n" if($Preview -or (-not $Execute)){ Write-Host "== PREVIEW :: KB SANITIZE PASS ==" Write-Host ("Entries={0} badBefore={1} badAfter_if_written={2}" -f $clean.Count, $badBefore, $badAfter) Write-Host ("Will write -> {0} out-json-bytes�{1:n0}" -f $Kb, ([Text.Encoding]::UTF8.GetByteCount($json))) Write-Host "No write performed (Preview)." exit 0 } $bak = Write-SafeText -Target $Kb -Content ($json + $footer) -StageRoot $StageRoot Write-Host ("[OK] KB sanitiz�e et r��crite: {0}" -f $Kb) Write-Host ("Backup: {0}" -f ($bak ? $bak : ""))