# kb_bulk_ingest_v1.3.ps1 (regenerated) # Objet : Ingest updates\inbox\kb\AUTO_*.json.txt (JSON: {entries:[...]}, tableau, ou objet unique), # sanitation ASCII stricte, dédoublonnage par slug(title), puis (EXECUTE) mise à jour de BUG_KB.json.txt. # Contraintes : PS 5.1 strict, UTF-8 avec BOM (ce script), SAFE-WRITE (staging + __tmp__ + .bak), lecture streaming, # AUCUN ternaire, pas de Select-String -Recurse, pas de here-strings, pas d'opérateurs PS7. # Sorties attendues : # PREVIEW → lignes: "== PREVIEW :: BULK INGEST v1.3 ==", "Inbox files : X", "Candidates total: T New: N Duplicates: D" # puis une ligne par fichier: " - size=S cand=C new=N dup=D" # EXECUTE → "[OK] KB mise a jour : + new entries -> " + "Backup: " [CmdletBinding()] param( [switch]$Preview, [switch]$Execute, [string]$Root = "\\DS-918\\chatgpt\\ChatGPT-Gouvernance-Projets\\_registry", [string]$StageRoot = "C:\\Temp_Gouvernance" ) # ============================= Utils (PS 5.1 pur) ============================= function Ensure-Dir([string]$Path){ if([string]::IsNullOrWhiteSpace($Path)){ return } if(-not (Test-Path -LiteralPath $Path)){ New-Item -ItemType Directory -Force -Path $Path | Out-Null } } function Ensure-Parent([string]$Target){ if([string]::IsNullOrWhiteSpace($Target)){ return } $p = Split-Path -Parent $Target if($p){ Ensure-Dir $p } } function Get-NowIso(){ (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssK") } function To-Ascii([string]$s){ if($null -eq $s){ return "" } $t = $s $t = $t -replace "\u2018|\u2019|\u201A|\u201B","'" $t = $t -replace "\u201C|\u201D|\u201E","\"" $t = $t -replace "\u2013|\u2014|\u2212","-" $t = $t -replace "\u00A0"," " $t = $t -replace "\u2026","..." # supprime autres non-ASCII $out = New-Object System.Text.StringBuilder foreach($ch in $t.ToCharArray()){ if([int][char]$ch -le 127){ [void]$out.Append($ch) } else { [void]$out.Append('?') } } $out.ToString() } function Read-JsonSansFooter([string]$Path){ # Lecture brute UTF8, strip BOM internes et tout ce qui suit la dernière '}'. $raw = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 $raw = $raw -replace "`uFEFF","" $last = $raw.LastIndexOf('}') if($last -gt 0){ $raw = $raw.Substring(0, $last + 1) } # Remove commentaires style C/JS si présents $raw = [regex]::Replace($raw, "/\*.*?\*/", "", "Singleline") $raw = [regex]::Replace($raw, "^\s*//.*$", "", "Multiline") return $raw } function Parse-Json([string]$txt){ try{ return $txt | ConvertFrom-Json -ErrorAction Stop }catch{ Write-Host ("[ERR] JSON invalide: {0}" -f $_.Exception.Message) return $null } } function Compute-Slug([string]$title){ if([string]::IsNullOrWhiteSpace($title)){ return "" } $t = To-Ascii($title).ToLowerInvariant() $t = ($t -replace "[^a-z0-9]+","-").Trim("-") return $t } function Build-SlugIndex($kb){ $map = @{} if($null -eq $kb -or $null -eq $kb.entries){ return $map } $i = 0 foreach($e in $kb.entries){ $ti = "" if($null -ne $e.title){ $ti = [string]$e.title } elseif($null -ne $e.id){ $ti = [string]$e.id } $slug = Compute-Slug $ti if(-not [string]::IsNullOrWhiteSpace($slug)){ if(-not $map.ContainsKey($slug)){ $map[$slug] = $i } } $i = $i + 1 } return $map } function Normalize-Entry($e){ if($null -eq $e){ return $null } if($e.PSObject.Properties.Match("title").Count -gt 0){ $e.title = To-Ascii([string]$e.title) } if($e.PSObject.Properties.Match("workaround").Count -gt 0){ $e.workaround = To-Ascii([string]$e.workaround) } if($e.PSObject.Properties.Match("note").Count -gt 0){ $e.note = To-Ascii([string]$e.note) } if($e.PSObject.Properties.Match("fix").Count -gt 0){ $e.fix = To-Ascii([string]$e.fix) } if($e.PSObject.Properties.Match("tags").Count -gt 0 -and $null -ne $e.tags){ $arr = @() foreach($t in $e.tags){ $arr += ,(To-Ascii([string]$t)) } $e.tags = $arr } return $e } function Read-Kb([string]$KbCanon){ if(Test-Path -LiteralPath $KbCanon){ $txt = Read-JsonSansFooter $KbCanon $o = Parse-Json $txt if($null -eq $o){ $o = New-Object PSObject -Property @{ entries = @() } } if($null -eq $o.entries){ $o | Add-Member -Name entries -MemberType NoteProperty -Value @() } return $o }else{ return New-Object PSObject -Property @{ entries = @() } } } function Write-SafeText([string]$Target,[string]$Content,[string]$StageRoot){ # Écrit le fichier cible en UTF-8 SANS BOM (politique pour .json/.txt), via staging et .bak $stage = Join-Path $StageRoot "stage_io" Ensure-Dir $stage $tmp = Join-Path $stage ("__tmp_kb_" + [guid]::NewGuid().ToString("N") + ".txt") # UTF-8 sans BOM $enc = New-Object System.Text.UTF8Encoding($false) [System.IO.File]::WriteAllText($tmp,$Content,$enc) Ensure-Parent $Target $bak = $null if(Test-Path -LiteralPath $Target){ $bak = $Target + "." + (Get-Date).ToString("yyyyMMdd_HHmmss") + ".bak" Copy-Item -LiteralPath $Target -Destination $bak -Force } $destTmp = $Target + ".__tmp__" if(Test-Path -LiteralPath $destTmp){ Remove-Item -LiteralPath $destTmp -Force } Copy-Item -LiteralPath $tmp -Destination $destTmp -Force Move-Item -LiteralPath $destTmp -Destination $Target -Force return $bak } # ============================= Corps ========================================== $BugKbDir = Join-Path $Root "bug_kb" $KbCanon = Join-Path $BugKbDir "BUG_KB.json.txt" $Inbox = Join-Path $Root "updates\\inbox\\kb" $Processed = Join-Path $Root ("updates\\processed\\kb\\" + (Get-Date -f yyyyMMdd)) Ensure-Dir $BugKbDir; Ensure-Dir $Inbox; Ensure-Dir $Processed $kb = Read-Kb $KbCanon $index = Build-SlugIndex $kb $autos = @() if(Test-Path -LiteralPath $Inbox){ # Accepte .json.txt et .txt, préfixés AUTO_ $files = Get-ChildItem -LiteralPath $Inbox -File foreach($f in $files){ if($f.Name -like "AUTO_*" -and ($f.Name -like "*.json.txt" -or $f.Name -like "*.txt")){ $autos += ,$f } } } Write-Host "== PREVIEW :: BULK INGEST v1.3 ==" Write-Host ("Inbox files : {0}" -f ($autos.Count)) $total = 0 $newTotal = 0 $dupTotal = 0 $perFile = @() foreach($f in $autos){ $txt = Read-JsonSansFooter $f.FullName $obj = Parse-Json $txt $cand = 0; $new = 0; $dup = 0 if($null -ne $obj){ if($obj.PSObject.Properties.Match("entries").Count -gt 0 -and $null -ne $obj.entries){ foreach($e in $obj.entries){ $cand = $cand + 1 $e = Normalize-Entry $e $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } }elseif($obj -is [System.Array]){ foreach($e in $obj){ $cand = $cand + 1 $e = Normalize-Entry $e $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } }else{ $cand = $cand + 1 $e = Normalize-Entry $obj $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } } $total = $total + $cand $newTotal = $newTotal + $new $dupTotal = $dupTotal + $dup $perFile += ,@($f.Name, $f.Length, $cand, $new, $dup) } Write-Host ("Candidates total: {0} New: {1} Duplicates: {2}" -f $total,$newTotal,$dupTotal) foreach($r in $perFile){ Write-Host (" - {0} size={1} cand={2} new={3} dup={4}" -f $r[0],$r[1],$r[2],$r[3],$r[4]) } if($Preview -and -not $Execute){ Write-Host "No write performed (Preview)." exit 0 } if(-not $Execute){ # Par défaut sans -Execute, on reste lecture-seule. Write-Host "No write performed (default is Preview-only)." exit 0 } # ============================= EXECUTE ======================================== # Écriture KB canonique + déplacement des fichiers AUTO_* vers processed $kb.updated = Get-NowIso # ConvertTo-Json compressé, puis footer texte (doit rester ASCII) $json = ($kb | ConvertTo-Json -Depth 8 -Compress) $footer = "`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $((Get-Date).ToString('s'))`r`nPolicy: SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.4`r`nSource: KB_BULK_INGEST_v1.3`r`n" $bak = Write-SafeText -Target $KbCanon -Content ($json + $footer) -StageRoot $StageRoot foreach($f in $autos){ $dest = Join-Path $Processed $f.Name Ensure-Parent $dest Move-Item -LiteralPath $f.FullName -Destination $dest -Force } $bakMsg = $bak; if(-not $bakMsg){ $bakMsg = "" } Write-Host ("[OK] KB mise a jour : +{0} new entries -> {1}" -f $newTotal, $KbCanon) Write-Host ("Backup: {0}" -f $bakMsg) exit 0