# kb_bulk_ingest_v1.3.ps1 # Objet : Ingest updates\inbox\kb\AUTO_*.txt (JSON: {entries:[...]}, array, ou objet unique), # sanitation ASCII stricte, d?doublonnage par slug(title), puis (EXECUTE) mise ? jour de BUG_KB.json.txt. # Contraintes : PS 5.1, UTF-8 BOM, SAFE-WRITE (staging + .tmp + .bak), streaming pour lecture, Get-ChildItem -Path $pwd -Recurse -File | ForEach-Object { Select-String . Code sans caract?res non-ASCII. -Path $_.FullName } [CmdletBinding()] param( [switch]$Preview, [switch]$Execute, [string]$Root = "\\DS-918\chatgpt\ChatGPT-Gouvernance-Projets\_registry", [string]$StageRoot = "C:\Temp_Gouvernance" ) function Ensure-Dir([string]$Path){ if(-not (Test-Path -LiteralPath $Path)){ New-Item -ItemType Directory -Force -Path $Path | Out-Null } } function Ensure-Parent([string]$Target){ $p=Split-Path -Parent $Target; if($p){ Ensure-Dir $p } } function Get-NowIso(){ (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssK") } function Write-SafeText([string]$Target,[string]$Content,[string]$StageRoot){ Ensure-Dir $StageRoot; Ensure-Parent $Target $tmp=Join-Path $StageRoot ("write_" + [IO.Path]::GetRandomFileName()) $utf8=New-Object Text.UTF8Encoding($true) # UTF-8 BOM [IO.File]::WriteAllText($tmp,$Content,$utf8) $bak=$null if(Test-Path -LiteralPath $Target){ $bak=$Target+"."+(Get-Date -f yyyyMMdd_HHmmss)+".bak" Copy-Item -LiteralPath $Target -Destination $bak -Force } $tmpR="$Target.tmp"; Copy-Item -LiteralPath $tmp -Destination $tmpR -Force Move-Item -LiteralPath $tmpR -Destination $Target -Force Remove-Item -LiteralPath $tmp -Force return $bak } function Read-JsonSansFooter([string]$Path){ $sr=New-Object IO.StreamReader($Path) try{ $L=New-Object 'System.Collections.Generic.List[string]' while(-not $sr.EndOfStream){ $line=$sr.ReadLine() if($line -match '^\s*---\s*DOC-VERSION-FOOTER'){ break } $L.Add($line)|Out-Null } ($L -join "`n") } finally { $sr.Dispose() } } # Sanitation ASCII stricte (aucun litera l non-ASCII dans ce code) function Normalize-Ascii-Strict([string]$s){ if($null -eq $s){ return "" } # 1) guillemets/dashes typographiques -> ASCII via echappements Unicode $s = $s -replace "`u2018","'" -replace "`u2019","'" -replace "`u201A","'" -replace "`u201B","'" $s = $s -replace "`u201C",'"' -replace "`u201D",'"' -replace "`u201E",'"' $s = $s -replace "`u2013","-" -replace "`u2014","-" $s = $s -replace "`u00A0"," " # 2) neutraliser '@{' (hashtable stringifiee) $s = $s -replace "@\{","(" # 3) supprimer tout ce qui n'est pas ASCII imprimable (hors CR/LF/TAB) $s = [regex]::Replace($s, "[^\x09\x0A\x0D\x20-\x7E]", " ") # 4) compacter les espaces $s = [regex]::Replace($s, "\s{2,}", " ") $s.Trim() } function Slug([string]$t){ if([string]::IsNullOrWhiteSpace($t)){ return "" } $x=$t.ToLowerInvariant() [regex]::Replace($x,"[^a-z0-9]+","-").Trim("-") } # --- Paths $BugKbDir = Join-Path $Root "bug_kb" $KbCanon = Join-Path $BugKbDir "BUG_KB.json.txt" $Inbox = Join-Path $Root "updates\inbox\kb" $Processed = Join-Path $Root ("updates\processed\kb\" + (Get-Date -f yyyyMMdd)) Ensure-Dir $BugKbDir; Ensure-Dir $Inbox; Ensure-Dir $Processed # --- Charger KB canonique existante (sans footer) if(-not (Test-Path -LiteralPath $KbCanon)){ Write-Host "[ERR] KB canonique absente: $KbCanon"; exit 2 } $kbRaw = Read-JsonSansFooter $KbCanon try{ $kb = $kbRaw | ConvertFrom-Json -ErrorAction Stop } catch { Write-Host "[ERR] KB JSON invalide: $($_.Exception.Message)"; exit 3 } if(-not $kb.entries){ $kb | Add-Member -Name entries -MemberType NoteProperty -Value @() } # Index slugs existants $seen=@{} foreach($e in $kb.entries){ $t = [string]$e.title if(-not [string]::IsNullOrWhiteSpace($t)){ $slug = Slug $t if($slug){ $seen[$slug]=$true } } } # Collecte des fichiers AUTO_*.txt $autos = Get-ChildItem -LiteralPath $Inbox -Filter "AUTO_*.txt" -File | Sort-Object Name $totalCandidates = 0 $totalNew = 0 $totalDup = 0 $toAdd = New-Object System.Collections.ArrayList $perFile = @() foreach($f in $autos){ $txt = Read-JsonSansFooter $f.FullName $obj=$null try{ $obj = $txt | ConvertFrom-Json -ErrorAction Stop } catch { $obj=$null } $entries = @() if($obj){ if($obj.PSObject.Properties.Name -contains 'entries'){ $entries = @($obj.entries) } elseif($obj -is [System.Collections.IEnumerable]){ $entries = @($obj) } else { $entries = @($obj) } } $candidates = 0 $newHere = 0 $dupHere = 0 foreach($e in $entries){ $candidates++ $title = Normalize-Ascii-Strict ([string]$e.title) $slug = Slug $title if([string]::IsNullOrWhiteSpace($slug)){ continue } if($seen.ContainsKey($slug)){ $dupHere++; continue } $seen[$slug]=$true $id = Normalize-Ascii-Strict ([string]$e.id) $work = Normalize-Ascii-Strict ([string]$e.workaround) $note = Normalize-Ascii-Strict ([string]$e.note) $fix = Normalize-Ascii-Strict ([string]$e.fix) $tags=@() if($e.tags){ foreach($t in $e.tags){ $tags += (Normalize-Ascii-Strict ([string]$t)) } } $seenIn=@() if($e.seen_in_threads){ if($e.seen_in_threads -is [string]){ $seenIn=@(Normalize-Ascii-Strict ([string]$e.seen_in_threads)) } else { $seenIn=@($e.seen_in_threads | ForEach-Object { Normalize-Ascii-Strict ([string]$_) }) } } $objOut = [ordered]@{ id=$id; title=$title; blocking=[bool]$e.blocking; workaround=$work; note=$note; fix=$fix; tags=@($tags); seen_in_threads=@($seenIn); last_seen=[string]$e.last_seen } $null = $toAdd.Add($objOut) $newHere++ } $totalCandidates += $candidates $totalNew += $newHere $totalDup += $dupHere $perFile += ,[pscustomobject]@{ Name=$f.Name; Size=$f.Length; Candidates=$candidates; New=$newHere; Duplicates=$dupHere } } # --- PREVIEW if($Preview -or (-not $Execute)){ Write-Host "== PREVIEW :: BULK INGEST v1.3 ==" Write-Host ("Inbox files : {0}" -f $autos.Count) Write-Host ("Candidates total: {0} New: {1} Duplicates: {2}" -f $totalCandidates,$totalNew,$totalDup) foreach($p in $perFile){ Write-Host (" - {0} size={1:n0} cand={2} new={3} dup={4}" -f $p.Name,$p.Size,$p.Candidates,$p.New,$p.Duplicates) } Write-Host "No write performed (Preview)." exit 0 } # --- EXECUTE $kb.entries = @($kb.entries + $toAdd) $kb.updated = Get-NowIso $json = ($kb | ConvertTo-Json -Depth 6 -Compress) $footer = "`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $($kb.updated)`r`nPolicy: TXT-ONLY v1.0; SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.3`r`nSource: KB_BULK_INGEST_v1.3`r`n" $bak = Write-SafeText -Target $KbCanon -Content ($json + $footer) -StageRoot $StageRoot # Deplacer les AUTO traites foreach($f in $autos){ $dest = Join-Path $Processed $f.Name Ensure-Parent $dest Move-Item -LiteralPath $f.FullName -Destination $dest -Force } $bakMsg = $bak; if(-not $bakMsg){ $bakMsg = "" } Write-Host ("[OK] KB mise a jour : +{0} new entries -> {1}" -f $totalNew, $KbCanon) Write-Host ("Backup: {0}" -f $bakMsg)