# kb_bulk_ingest_v1.3.ps1
# Objet : Ingest updates\inbox\kb\AUTO_*.txt (JSON: {entries:[...]}, array, ou objet unique),
#         sanitation ASCII stricte, dédoublonnage par slug(title), puis (EXECUTE) mise à jour de BUG_KB.json.txt.
# Contraintes : PS 5.1, UTF-8 BOM, SAFE-WRITE (staging + .tmp + .bak), streaming pour lecture,
#               AUCUN ternaire, pas de Select-String -Recurse. Code sans caractères non-ASCII.

[CmdletBinding()]
param(
  [switch]$Preview,
  [switch]$Execute,
  [string]$Root = "\\DS-918\chatgpt\ChatGPT-Gouvernance-Projets\_registry",
  [string]$StageRoot = "C:\Temp_Gouvernance"
)

function Ensure-Dir([string]$Path){ if(-not (Test-Path -LiteralPath $Path)){ New-Item -ItemType Directory -Force -Path $Path | Out-Null } }
function Ensure-Parent([string]$Target){ $p=Split-Path -Parent $Target; if($p){ Ensure-Dir $p } }
function Get-NowIso(){ (Get-Date).ToString("yyyy-MM-ddTHH:mm:ssK") }
function Write-SafeText([string]$Target,[string]$Content,[string]$StageRoot){
  Ensure-Dir $StageRoot; Ensure-Parent $Target
  $tmp=Join-Path $StageRoot ("write_" + [IO.Path]::GetRandomFileName())
  $utf8=New-Object Text.UTF8Encoding($true)  # UTF-8 BOM
  [IO.File]::WriteAllText($tmp,$Content,$utf8)
  $bak=$null
  if(Test-Path -LiteralPath $Target){
    $bak=$Target+"."+(Get-Date -f yyyyMMdd_HHmmss)+".bak"
    Copy-Item -LiteralPath $Target -Destination $bak -Force
  }
  $tmpR="$Target.tmp"; Copy-Item -LiteralPath $tmp -Destination $tmpR -Force
  Move-Item -LiteralPath $tmpR -Destination $Target -Force
  Remove-Item -LiteralPath $tmp -Force
  return $bak
}
function Read-JsonSansFooter([string]$Path){
  $sr=New-Object IO.StreamReader($Path)
  try{
    $L=New-Object 'System.Collections.Generic.List[string]'
    while(-not $sr.EndOfStream){
      $line=$sr.ReadLine()
      if($line -match '^\s*---\s*DOC-VERSION-FOOTER'){ break }
      $L.Add($line)|Out-Null
    }
    ($L -join "`n")
  } finally { $sr.Dispose() }
}

# Sanitation ASCII stricte (aucun litera l non-ASCII dans ce code)
function Normalize-Ascii-Strict([string]$s){
  if($null -eq $s){ return "" }
  # 1) guillemets/dashes typographiques -> ASCII via echappements Unicode
  $s = $s -replace "`u2018","'" -replace "`u2019","'" -replace "`u201A","'" -replace "`u201B","'"
  $s = $s -replace "`u201C",'"' -replace "`u201D",'"' -replace "`u201E",'"'
  $s = $s -replace "`u2013","-" -replace "`u2014","-"
  $s = $s -replace "`u00A0"," "
  # 2) neutraliser '@{' (hashtable stringifiee)
  $s = $s -replace "@\{","("
  # 3) supprimer tout ce qui n'est pas ASCII imprimable (hors CR/LF/TAB)
  $s = [regex]::Replace($s, "[^\x09\x0A\x0D\x20-\x7E]", " ")
  # 4) compacter les espaces
  $s = [regex]::Replace($s, "\s{2,}", " ")
  $s.Trim()
}

function Slug([string]$t){
  if([string]::IsNullOrWhiteSpace($t)){ return "" }
  $x=$t.ToLowerInvariant()
  [regex]::Replace($x,"[^a-z0-9]+","-").Trim("-")
}

# --- Paths
$BugKbDir  = Join-Path $Root "bug_kb"
$KbCanon   = Join-Path $BugKbDir "BUG_KB.json.txt"
$Inbox     = Join-Path $Root "updates\inbox\kb"
$Processed = Join-Path $Root ("updates\processed\kb\" + (Get-Date -f yyyyMMdd))
Ensure-Dir $BugKbDir; Ensure-Dir $Inbox; Ensure-Dir $Processed

# --- Charger KB canonique existante (sans footer)
if(-not (Test-Path -LiteralPath $KbCanon)){ Write-Host "[ERR] KB canonique absente: $KbCanon"; exit 2 }
$kbRaw = Read-JsonSansFooter $KbCanon
try{ $kb = $kbRaw | ConvertFrom-Json -ErrorAction Stop } catch { Write-Host "[ERR] KB JSON invalide: $($_.Exception.Message)"; exit 3 }
if(-not $kb.entries){ $kb | Add-Member -Name entries -MemberType NoteProperty -Value @() }

# Index slugs existants
$seen=@{}
foreach($e in $kb.entries){
  $t = [string]$e.title
  if(-not [string]::IsNullOrWhiteSpace($t)){
    $slug = Slug $t
    if($slug){ $seen[$slug]=$true }
  }
}

# Collecte des fichiers AUTO_*.txt
$autos = Get-ChildItem -LiteralPath $Inbox -Filter "AUTO_*.txt" -File | Sort-Object Name
$totalCandidates = 0
$totalNew = 0
$totalDup = 0
$toAdd = New-Object System.Collections.ArrayList
$perFile = @()

foreach($f in $autos){
  $txt = Read-JsonSansFooter $f.FullName
  $obj=$null
  try{ $obj = $txt | ConvertFrom-Json -ErrorAction Stop } catch { $obj=$null }

  $entries = @()
  if($obj){
    if($obj.PSObject.Properties.Name -contains 'entries'){ $entries = @($obj.entries) }
    elseif($obj -is [System.Collections.IEnumerable]){ $entries = @($obj) }
    else { $entries = @($obj) }
  }

  $candidates = 0
  $newHere = 0
  $dupHere = 0

  foreach($e in $entries){
    $candidates++
    $title = Normalize-Ascii-Strict ([string]$e.title)
    $slug  = Slug $title
    if([string]::IsNullOrWhiteSpace($slug)){ continue }
    if($seen.ContainsKey($slug)){ $dupHere++; continue }

    $seen[$slug]=$true
    $id   = Normalize-Ascii-Strict ([string]$e.id)
    $work = Normalize-Ascii-Strict ([string]$e.workaround)
    $note = Normalize-Ascii-Strict ([string]$e.note)
    $fix  = Normalize-Ascii-Strict ([string]$e.fix)

    $tags=@()
    if($e.tags){ foreach($t in $e.tags){ $tags += (Normalize-Ascii-Strict ([string]$t)) } }

    $seenIn=@()
    if($e.seen_in_threads){
      if($e.seen_in_threads -is [string]){ $seenIn=@(Normalize-Ascii-Strict ([string]$e.seen_in_threads)) }
      else { $seenIn=@($e.seen_in_threads | ForEach-Object { Normalize-Ascii-Strict ([string]$_) }) }
    }

    $objOut = [ordered]@{
      id=$id; title=$title; blocking=[bool]$e.blocking; workaround=$work; note=$note; fix=$fix;
      tags=@($tags); seen_in_threads=@($seenIn); last_seen=[string]$e.last_seen
    }
    $null = $toAdd.Add($objOut)
    $newHere++
  }

  $totalCandidates += $candidates
  $totalNew += $newHere
  $totalDup += $dupHere
  $perFile += ,[pscustomobject]@{ Name=$f.Name; Size=$f.Length; Candidates=$candidates; New=$newHere; Duplicates=$dupHere }
}

# --- PREVIEW
if($Preview -or (-not $Execute)){
  Write-Host "== PREVIEW :: BULK INGEST v1.3 =="
  Write-Host ("Inbox files     : {0}" -f $autos.Count)
  Write-Host ("Candidates total: {0}  New: {1}  Duplicates: {2}" -f $totalCandidates,$totalNew,$totalDup)
  foreach($p in $perFile){ Write-Host (" - {0}  size={1:n0}  cand={2}  new={3}  dup={4}" -f $p.Name,$p.Size,$p.Candidates,$p.New,$p.Duplicates) }
  Write-Host "No write performed (Preview)."
  exit 0
}

# --- EXECUTE
$kb.entries = @($kb.entries + $toAdd)
$kb.updated = Get-NowIso
$json = ($kb | ConvertTo-Json -Depth 6 -Compress)
$footer = "`r`n`r`n--- DOC-VERSION-FOOTER ---`r`nGenerated: $($kb.updated)`r`nPolicy: TXT-ONLY v1.0; SAFE-WRITE v1.1; GOV_SCRIPT_GATE v1.3`r`nSource: KB_BULK_INGEST_v1.3`r`n"
$bak = Write-SafeText -Target $KbCanon -Content ($json + $footer) -StageRoot $StageRoot

# Deplacer les AUTO traites
foreach($f in $autos){
  $dest = Join-Path $Processed $f.Name
  Ensure-Parent $dest
  Move-Item -LiteralPath $f.FullName -Destination $dest -Force
}

$bakMsg = $bak; if(-not $bakMsg){ $bakMsg = "<none>" }
Write-Host ("[OK] KB mise a jour : +{0} new entries  -> {1}" -f $totalNew, $KbCanon)
Write-Host ("Backup: {0}" -f $bakMsg)