# kb_bulk_ingest_v1.3.ps1 (regenerated, safe & ASCII) # Object : Ingest updates\inbox\kb\AUTO_*.json.txt, normalize ASCII, dedupe by slug(title), # and (on -Execute) update bug_kb\BUG_KB.json.txt via SAFE-WRITE. # Policy : PS 5.1 strict, script saved as UTF-8 with BOM (this file), JSON/TXT outputs as UTF-8 without BOM. # Limits : No here-strings, no PS7 operators, no ternary, no Select-String -Recurse. # Output : PREVIEW prints counts and per-file lines. EXECUTE prints OK + backup path. # Encoding: ASCII-safe literals only inside the script. [CmdletBinding()] param( [switch]$Preview, [switch]$Execute, [string]$Root = "\\DS-918\\chatgpt\\ChatGPT-Gouvernance-Projets\\_registry", [string]$StageRoot = "C:\\Temp_Gouvernance" ) # ============================= Utils (PS 5.1) ============================= function Ensure-Dir([string]$Path){ if([string]::IsNullOrWhiteSpace($Path)){ return } if(-not (Test-Path -LiteralPath $Path)){ New-Item -ItemType Directory -Force -Path $Path | Out-Null } } function Ensure-Parent([string]$Target){ if([string]::IsNullOrWhiteSpace($Target)){ return } $p = Split-Path -Parent $Target if($p){ Ensure-Dir $p } } function Get-NowIso(){ (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssK") } function To-Ascii([string]$s){ if($null -eq $s){ return "" } $sb = New-Object System.Text.StringBuilder foreach($ch in $s.ToCharArray()){ $code = [int][char]$ch if($code -le 0x7F){ # Keep ASCII directly [void]$sb.Append($ch) } else { # Map a few common Unicode punctuation to ASCII if($code -eq 0x2018 -or $code -eq 0x2019 -or $code -eq 0x201A -or $code -eq 0x201B){ [void]$sb.Append("'") } elseif($code -eq 0x201C -or $code -eq 0x201D -or $code -eq 0x201E){ [void]$sb.Append('"') } elseif($code -eq 0x2013 -or $code -eq 0x2014 -or $code -eq 0x2212){ [void]$sb.Append("-") } elseif($code -eq 0x00A0){ [void]$sb.Append(" ") } elseif($code -eq 0x2026){ [void]$sb.Append("...") } else { [void]$sb.Append("?") } } } $sb.ToString() } function Read-TextSansFooter([string]$Path){ $raw = Get-Content -LiteralPath $Path -Raw -Encoding UTF8 # strip internal U+FEFF if any $raw = $raw -replace "`uFEFF","" # Trim after last closing brace or bracket $lastObj = $raw.LastIndexOf('}') $lastArr = $raw.LastIndexOf(']') $last = $lastObj if($lastArr -gt $last){ $last = $lastArr } if($last -gt 0){ $raw = $raw.Substring(0, $last + 1) } # remove C/JS style comments (non-greedy, simple) $raw = [regex]::Replace($raw, "/\*.*?\*/", "", "Singleline") $raw = [regex]::Replace($raw, "^\s*//.*$", "", "Multiline") return $raw } function Parse-JsonSafe([string]$txt){ try{ return $txt | ConvertFrom-Json -ErrorAction Stop }catch{ Write-Host ("[ERR] JSON invalide: {0}" -f $_.Exception.Message) return $null } } function Compute-Slug([string]$title){ if([string]::IsNullOrWhiteSpace($title)){ return "" } $t = To-Ascii($title).ToLowerInvariant() $t = ($t -replace "[^a-z0-9]+","-") $t = $t.Trim('-') return $t } function Build-SlugIndex($kb){ $map = @{} if($null -eq $kb -or $null -eq $kb.entries){ return $map } $i = 0 foreach($e in $kb.entries){ $ti = "" if($null -ne $e.title){ $ti = [string]$e.title } elseif($null -ne $e.id){ $ti = [string]$e.id } $slug = Compute-Slug $ti if(-not [string]::IsNullOrWhiteSpace($slug)){ if(-not $map.ContainsKey($slug)){ $map[$slug] = $i } } $i = $i + 1 } return $map } function Normalize-Entry($e){ if($null -eq $e){ return $null } if($e.PSObject.Properties.Match("title").Count -gt 0){ $e.title = To-Ascii([string]$e.title) } if($e.PSObject.Properties.Match("workaround").Count -gt 0){ $e.workaround = To-Ascii([string]$e.workaround) } if($e.PSObject.Properties.Match("note").Count -gt 0){ $e.note = To-Ascii([string]$e.note) } if($e.PSObject.Properties.Match("fix").Count -gt 0){ $e.fix = To-Ascii([string]$e.fix) } if($e.PSObject.Properties.Match("tags").Count -gt 0 -and $null -ne $e.tags){ $arr = @() foreach($t in $e.tags){ $arr += ,(To-Ascii([string]$t)) } $e.tags = $arr } return $e } function Read-Kb([string]$KbCanon){ if(Test-Path -LiteralPath $KbCanon){ $txt = Read-TextSansFooter $KbCanon $o = Parse-JsonSafe $txt if($null -eq $o){ $o = New-Object PSObject -Property @{ entries = @() } } if($null -eq $o.entries){ $o | Add-Member -Name entries -MemberType NoteProperty -Value @() } return $o }else{ return New-Object PSObject -Property @{ entries = @() } } } function Write-SafeText([string]$Target,[string]$Content,[string]$StageRoot){ # Writes JSON/TXT as UTF-8 WITHOUT BOM using staging + __tmp__ + .bak $stage = Join-Path $StageRoot "stage_io" Ensure-Dir $stage $tmp = Join-Path $stage ("__tmp_kb_" + [guid]::NewGuid().ToString("N") + ".txt") $enc = New-Object System.Text.UTF8Encoding($false) [System.IO.File]::WriteAllText($tmp,$Content,$enc) Ensure-Parent $Target $bak = $null if(Test-Path -LiteralPath $Target){ $bak = $Target + "." + (Get-Date).ToString("yyyyMMdd_HHmmss") + ".bak" Copy-Item -LiteralPath $Target -Destination $bak -Force } $destTmp = $Target + ".__tmp__" if(Test-Path -LiteralPath $destTmp){ Remove-Item -LiteralPath $destTmp -Force } Copy-Item -LiteralPath $tmp -Destination $destTmp -Force Move-Item -LiteralPath $destTmp -Destination $Target -Force return $bak } # ============================= Paths ====================================== $BugKbDir = Join-Path $Root "bug_kb" $KbCanon = Join-Path $BugKbDir "BUG_KB.json.txt" $Inbox = Join-Path $Root "updates\\inbox\\kb" $Processed = Join-Path $Root ("updates\\processed\\kb\\" + (Get-Date -f yyyyMMdd)) Ensure-Dir $BugKbDir; Ensure-Dir $Inbox; Ensure-Dir $Processed # ============================= Load KB & index ============================ $kb = Read-Kb $KbCanon $index = Build-SlugIndex $kb # ============================= Collect AUTO_* ============================= $autos = @() if(Test-Path -LiteralPath $Inbox){ $files = Get-ChildItem -LiteralPath $Inbox -File foreach($f in $files){ if(($f.Name -like "AUTO_*") -and (($f.Name -like "*.json.txt") -or ($f.Name -like "*.txt"))){ $autos += ,$f } } } # ============================= PREVIEW header ============================= Write-Host "== PREVIEW :: BULK INGEST v1.3 ==" Write-Host ("Inbox files : {0}" -f ($autos.Count)) $total = 0 $newTotal = 0 $dupTotal = 0 $perFile = @() # ============================= Process files ============================= foreach($f in $autos){ $txt = Read-TextSansFooter $f.FullName $obj = Parse-JsonSafe $txt $cand = 0; $new = 0; $dup = 0 if($null -ne $obj){ if($obj.PSObject.Properties.Match("entries").Count -gt 0 -and $null -ne $obj.entries){ foreach($e in $obj.entries){ $cand = $cand + 1 $e = Normalize-Entry $e $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } } elseif($obj -is [System.Array]){ foreach($e in $obj){ $cand = $cand + 1 $e = Normalize-Entry $e $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } } else { $cand = $cand + 1 $e = Normalize-Entry $obj $title = "" if($e.PSObject.Properties.Match("title").Count -gt 0){ $title = [string]$e.title } elseif($e.PSObject.Properties.Match("id").Count -gt 0){ $title = [string]$e.id } $slug = Compute-Slug $title if([string]::IsNullOrWhiteSpace($slug)){ $new = $new + 1 if($Execute){ $kb.entries += ,$e } }else{ if($index.ContainsKey($slug)){ $dup = $dup + 1 }else{ $index[$slug] = 1 $new = $new + 1 if($Execute){ $kb.entries += ,$e } } } } } $total = $total + $cand $newTotal = $newTotal + $new $dupTotal = $dupTotal + $dup $perFile += ,@($f.Name, $f.Length, $cand, $new, $dup) } Write-Host ("Candidates total: {0} New: {1} Duplicates: {2}" -f $total,$newTotal,$dupTotal) foreach($r in $perFile){ Write-Host (" - {0} size={1} cand={2} new={3} dup={4}" -f $r[0],$r[1],$r[2],$r[3],$r[4]) } # ============================= Exit on Preview ============================= if($Preview -and -not $Execute){ Write-Host "No write performed (Preview)." exit 0 } if(-not $Execute){ Write-Host "No write performed (default is Preview-only)." exit 0 } # ============================= EXECUTE ==================================== $kb.updated = Get-NowIso $json = ($kb | ConvertTo-Json -Depth 20 -Compress) $footer = "`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $((Get-Date).ToString('s'))`r`nPolicy: SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.4`r`nSource: KB_BULK_INGEST_v1.3`r`n" $bak = Write-SafeText -Target $KbCanon -Content ($json + $footer) -StageRoot $StageRoot foreach($f in $autos){ $dest = Join-Path $Processed $f.Name Ensure-Parent $dest Move-Item -LiteralPath $f.FullName -Destination $dest -Force } $bakMsg = $bak; if(-not $bakMsg){ $bakMsg = "" } Write-Host ("[OK] KB mise a jour : +{0} new entries -> {1}" -f $newTotal, $KbCanon) Write-Host ("Backup: {0}" -f $bakMsg) exit 0