mirror of
https://github.com/github/awesome-copilot.git
synced 2026-06-18 13:41:26 +00:00
411 lines
17 KiB
PowerShell
411 lines
17 KiB
PowerShell
#!/usr/bin/env pwsh
|
|
# fix-broken-links - link-fix.ps1 (PowerShell 7+ port of link-fix.sh)
|
|
#
|
|
# After the agent edits files (postToolUse): take the files it just changed,
|
|
# extract every http(s) URL, and check each one.
|
|
# • With file paths passed (the edited files, injected from the hook payload, or
|
|
# given on the command line) any URL that is not 200 gets spelling variations
|
|
# (http/https, www, trailing slash) then a Copilot CLI agent hand-off for more
|
|
# alternatives, followed by an interactive menu to replace / remove / skip.
|
|
# • With NO file arguments it only lists the broken links - no alternative
|
|
# lookups and no prompts.
|
|
# Generic anchor text is flagged as an SEO note either way.
|
|
#
|
|
# Pure PowerShell + .NET (Invoke-WebRequest/regex), plus an optional Copilot CLI
|
|
# hand-off for suggestions.
|
|
# Covers: HTML · Markdown · JS/TS · JSON · CSS · SQL · templates (all via URL scan)
|
|
# Trigger: postToolUse
|
|
|
|
Set-StrictMode -Off
|
|
$ProgressPreference = 'SilentlyContinue' # Invoke-WebRequest is far faster without the bar
|
|
|
|
# The agent hand-off below invokes `copilot`, which may itself re-fire this hook.
|
|
# The child run is marked with this env var; exit immediately if it is present so
|
|
# we never recurse.
|
|
if ($env:FIX_BROKEN_LINKS_AGENT) { exit 0 }
|
|
|
|
$LIMIT = 50
|
|
$TIMEOUT = 10
|
|
$UA = 'Mozilla/5.0 (compatible; fix-broken-links/1.0)'
|
|
$AGENT_MODEL = 'gpt-5-mini' # small, low-token model for the suggestion hand-off
|
|
$AGENT_TIMEOUT = 60 # seconds before giving up on the agent
|
|
$WEB_RE = '\.(html?|xhtml|md|markdown|mdx|js|jsx|ts|tsx|vue|svelte|json|jsonl|css|sql|erb|jinja|j2|twig|ejs|pug|hbs)$'
|
|
|
|
# Positional args become the file list; the hook payload can also supply them.
|
|
$ScriptArgs = [System.Collections.Generic.List[string]]::new()
|
|
foreach ($a in $args) { [void]$ScriptArgs.Add([string]$a) }
|
|
|
|
# ── Hook stdin ────────────────────────────────────────────────────────────────
|
|
# When called as a postToolUse hook, extract edited files from the JSON payload
|
|
# and inject them as positional args so Get-InputFiles picks them up.
|
|
$IsHook = $false
|
|
if ($ScriptArgs.Count -eq 0 -and [Console]::IsInputRedirected) {
|
|
$IsHook = $true # invoked as a hook: stdin carries the tool payload
|
|
$raw = [Console]::In.ReadToEnd()
|
|
if ($raw.Trim()) {
|
|
try {
|
|
$json = $raw | ConvertFrom-Json
|
|
$tool = $json.toolName; if (-not $tool) { $tool = $json.tool_name }
|
|
if ($tool) {
|
|
if ($tool -in 'editFiles','edit','write','str_replace_editor','create_file','multiEdit','applyPatch') {
|
|
# Only the files this edit tool just changed - never a wider repo scan.
|
|
$hookFiles = $json.tool_input.files; if (-not $hookFiles) { $hookFiles = $json.toolInput.files }
|
|
if (-not $hookFiles) { $hookFiles = $json.tool_input.path; if (-not $hookFiles) { $hookFiles = $json.toolInput.path } }
|
|
if ($hookFiles) { foreach ($hf in $hookFiles) { [void]$ScriptArgs.Add([string]$hf) } }
|
|
}
|
|
else {
|
|
# Different tool (bash, read, etc.) - nothing to check
|
|
exit 0
|
|
}
|
|
}
|
|
# No tool context - called manually with piped input, fall through
|
|
} catch { }
|
|
}
|
|
}
|
|
|
|
# A non-empty positional list means the caller passed files: the edited files from
|
|
# the hook payload above, or paths given on the command line. Only then do we run
|
|
# the full repair flow (look up alternatives, then prompt to fix). With no
|
|
# parameters we simply list the broken links - no lookups, no prompts.
|
|
$HaveParams = $ScriptArgs.Count -gt 0
|
|
|
|
# Interactive prompts are only possible when input is a real console; once the
|
|
# hook JSON has been read from a redirected stdin we report rather than prompt.
|
|
$Interactive = [Environment]::UserInteractive -and -not [Console]::IsInputRedirected
|
|
|
|
function Read-Answer {
|
|
param([string]$Prompt)
|
|
if (-not $Interactive) { return '' }
|
|
[Console]::Out.Write($Prompt)
|
|
$ans = [Console]::In.ReadLine()
|
|
if ($null -eq $ans) { return '' }
|
|
return $ans
|
|
}
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
function Get-HttpStatus {
|
|
param([string]$Url)
|
|
try {
|
|
$resp = Invoke-WebRequest -Uri $Url -MaximumRedirection 5 -TimeoutSec $TIMEOUT `
|
|
-UserAgent $UA -ErrorAction Stop
|
|
return [string][int]$resp.StatusCode
|
|
} catch {
|
|
$resp = $_.Exception.Response
|
|
if ($resp -and $resp.StatusCode) { return [string][int]$resp.StatusCode }
|
|
return 'ERR'
|
|
}
|
|
}
|
|
|
|
# Split a URL into scheme/host/path the same way the bash port does (string ops,
|
|
# not [uri], so wildcards and odd paths survive intact).
|
|
function Split-Url {
|
|
param([string]$Url)
|
|
$scheme = ($Url -split '://',2)[0]
|
|
$rest = $Url -replace '^[a-zA-Z][a-zA-Z0-9+.-]*://',''
|
|
$hostName = ($rest -split '/',2)[0]
|
|
if ($rest -eq $hostName) { $path = '' } else { $path = '/' + ($rest -split '/',2)[1] }
|
|
[pscustomobject]@{ Scheme = $scheme; Host = $hostName; Path = $path }
|
|
}
|
|
|
|
# Every http(s) URL in a file, trailing punctuation trimmed, de-duplicated.
|
|
function Get-Urls {
|
|
param([string]$File)
|
|
$text = [System.IO.File]::ReadAllText($File)
|
|
[regex]::Matches($text, 'https?://[^"''<> )]+', 'IgnoreCase') |
|
|
ForEach-Object { $_.Value -replace '[.,;:]+$','' } |
|
|
Sort-Object -Unique
|
|
}
|
|
|
|
# Generic anchor text that weakens SEO.
|
|
function Get-SeoIssues {
|
|
param([string]$File)
|
|
$text = [System.IO.File]::ReadAllText($File)
|
|
$reA = '<a[^>]*>\s*(click here|click|here|read more|more|this page|this|learn more|see more|view|visit|details|info)\s*</a>'
|
|
$reB = '\[(click here|click|here|read more|more|this page|learn more|see more|details|info)\]\('
|
|
@([regex]::Matches($text, $reA, 'IgnoreCase')) +
|
|
@([regex]::Matches($text, $reB, 'IgnoreCase')) | ForEach-Object { $_.Value }
|
|
}
|
|
|
|
# Try common URL variations; return the first that returns 200, else ''.
|
|
function Find-Variation {
|
|
param([string]$Url)
|
|
$p = Split-Url $Url
|
|
$scheme = $p.Scheme; $hostName = $p.Host; $path = $p.Path
|
|
$cands = [System.Collections.Generic.List[string]]::new()
|
|
if ($scheme -eq 'http') { [void]$cands.Add("https://$hostName$path") }
|
|
if ($scheme -eq 'https') { [void]$cands.Add("http://$hostName$path") }
|
|
if ($hostName -like 'www.*') { [void]$cands.Add("$scheme`://$($hostName.Substring(4))$path") }
|
|
else { [void]$cands.Add("$scheme`://www.$hostName$path") }
|
|
if ($path -and $path -notmatch '/$' -and (($path -split '/')[-1]) -notmatch '\.') {
|
|
[void]$cands.Add(($Url -replace '/$','') + '/')
|
|
}
|
|
foreach ($c in $cands) {
|
|
if ($c -eq $Url) { continue }
|
|
if ((Get-HttpStatus $c) -eq '200') { return $c }
|
|
}
|
|
return ''
|
|
}
|
|
|
|
# Hand the broken link to the Copilot CLI agent and let it propose alternatives.
|
|
# A deliberately lightweight, low-token hand-off: one non-interactive prompt to a
|
|
# small model with no tools enabled (so it answers from its own knowledge - no web
|
|
# fetches, no permission prompts, no archive lookups on our side). The model may
|
|
# prefix a prose line, so we pull http(s) tokens from anywhere in the output, trim
|
|
# trailing punctuation, drop the broken URL itself, and de-duplicate. The call runs
|
|
# as a job so it can be capped at $AGENT_TIMEOUT seconds.
|
|
function Get-AgentAlts {
|
|
param([string]$Url,[int]$Max)
|
|
if (-not (Get-Command copilot -ErrorAction SilentlyContinue)) { return @() }
|
|
$snappy = $AGENT_TIMEOUT - 5
|
|
$prompt = "In under $snappy seconds, find up to $Max working alternative URLs for the broken link $Url. Hierarchically consider 1. Path and/or page spelling; 2. web.archive.org/wayback; 3. Redirects using redirect destination; 4. The context of the link's text; in order to resolve. Output only the URLs. One per line, and no: prose, numbering, markdown, backticks, special characters, post formatting."
|
|
$out = ''
|
|
try {
|
|
# FIX_BROKEN_LINKS_AGENT marks the child run so a re-entrant hook exits early.
|
|
$job = Start-Job -ScriptBlock {
|
|
param($Prompt, $Model)
|
|
$env:FIX_BROKEN_LINKS_AGENT = '1'
|
|
copilot -p $Prompt -s --no-color --model $Model --available-tools 2>$null
|
|
} -ArgumentList $prompt, $AGENT_MODEL
|
|
# Only read output from a job that completed cleanly; a failed/errored copilot
|
|
# run yields no alternatives.
|
|
if ((Wait-Job $job -Timeout $AGENT_TIMEOUT) -and $job.State -eq 'Completed') {
|
|
$out = (Receive-Job $job -ErrorAction SilentlyContinue | Out-String)
|
|
}
|
|
Remove-Job $job -Force -ErrorAction SilentlyContinue
|
|
} catch { $out = '' }
|
|
if (-not $out) { return @() }
|
|
|
|
$seen = @{}
|
|
$result = [System.Collections.Generic.List[string]]::new()
|
|
foreach ($m in [regex]::Matches($out, 'https?://[^\s"''<>)\]]+', 'IgnoreCase')) {
|
|
if ($result.Count -ge $Max) { break }
|
|
$u = $m.Value -replace '[.,;:]+$',''
|
|
$key = $u.ToLower()
|
|
if ($key -eq $Url.ToLower()) { continue }
|
|
if ($seen.ContainsKey($key)) { continue }
|
|
$seen[$key] = $true
|
|
[void]$result.Add($u)
|
|
}
|
|
return ,$result.ToArray()
|
|
}
|
|
|
|
# Up to MAX viable replacement URLs for a broken link, best first:
|
|
# 1. a working scheme/www/slash variation (verified live 200)
|
|
# 2. alternatives proposed by the Copilot CLI agent (see Get-AgentAlts)
|
|
# De-duplicated case-insensitively. The first item is what `r` uses; the rest
|
|
# become the numbered alternatives.
|
|
function Get-SuggestedAlts {
|
|
param([string]$Url,[int]$Max = 6)
|
|
$seen = @{}
|
|
$out = [System.Collections.Generic.List[string]]::new()
|
|
|
|
$v = Find-Variation $Url
|
|
if ($v) { [void]$out.Add($v); $seen[$v.ToLower()] = $true }
|
|
|
|
foreach ($a in (Get-AgentAlts $Url $Max)) {
|
|
if ($out.Count -ge $Max) { break }
|
|
if (-not $a) { continue }
|
|
$key = $a.ToLower()
|
|
if ($seen.ContainsKey($key)) { continue }
|
|
[void]$out.Add($a); $seen[$key] = $true
|
|
}
|
|
return ,$out.ToArray()
|
|
}
|
|
|
|
# Replace a literal URL everywhere in a file (plain string replace, no regex).
|
|
function Set-UrlReplacement {
|
|
param([string]$File,[string]$Old,[string]$New)
|
|
$content = [System.IO.File]::ReadAllText($File)
|
|
[System.IO.File]::WriteAllText($File, $content.Replace($Old, $New))
|
|
}
|
|
|
|
# Remove the link wrapper but keep the visible text:
|
|
# <a href="URL">text</a> -> text
|
|
# [text](URL) -> text
|
|
function Remove-LinkWrapper {
|
|
param([string]$File,[string]$Url)
|
|
$content = [System.IO.File]::ReadAllText($File)
|
|
$esc = [regex]::Escape($Url)
|
|
# Each element is parenthesized: the comma operator binds tighter than '+', so
|
|
# without the parens the three concatenations collapse into a single string and
|
|
# the array would hold one bogus pattern instead of three.
|
|
$patterns = @(
|
|
('<a[^>]*href="' + $esc + '"[^>]*>([^<]*)</a>'),
|
|
("<a[^>]*href='" + $esc + "'[^>]*>([^<]*)</a>"),
|
|
('\[([^\]]*)\]\(' + $esc + '[^)]*\)')
|
|
)
|
|
foreach ($pat in $patterns) {
|
|
$content = [regex]::Replace($content, $pat, '$1', 'IgnoreCase')
|
|
}
|
|
[System.IO.File]::WriteAllText($File, $content)
|
|
}
|
|
|
|
# ── File discovery ────────────────────────────────────────────────────────────
|
|
|
|
function Get-InputFiles {
|
|
if ($ScriptArgs.Count -gt 0) { return $ScriptArgs.ToArray() }
|
|
# Fired as a hook but the payload carried no (web) files: do nothing rather than
|
|
# fall back to scanning unrelated files - the hook only ever checks edited files.
|
|
if ($IsHook) { return @() }
|
|
$out = @()
|
|
if (Get-Command git -ErrorAction SilentlyContinue) {
|
|
git rev-parse --git-dir *> $null
|
|
if ($LASTEXITCODE -eq 0) {
|
|
$out = @(git diff --name-only HEAD 2>$null) + @(git diff --name-only --cached 2>$null)
|
|
}
|
|
}
|
|
if ($out.Count -gt 0) { return $out }
|
|
Get-ChildItem -Recurse -File -ErrorAction SilentlyContinue |
|
|
Where-Object { $_.FullName -notmatch '[\\/](\.git|node_modules|dist|build|\.next|\.venv|__pycache__)[\\/]' } |
|
|
ForEach-Object { Resolve-Path -Relative -LiteralPath $_.FullName }
|
|
}
|
|
|
|
$seenFiles = @{}
|
|
$FILES = [System.Collections.Generic.List[string]]::new()
|
|
foreach ($f in (Get-InputFiles)) {
|
|
if (-not $f) { continue }
|
|
$f = ([string]$f).Trim()
|
|
if (-not (Test-Path -LiteralPath $f -PathType Leaf)) { continue }
|
|
if ($f -match '[\\/](node_modules|\.git|dist|build)[\\/]') { continue }
|
|
if ($f -notmatch $WEB_RE) { continue }
|
|
if ($seenFiles.ContainsKey($f)) { continue }
|
|
$seenFiles[$f] = $true
|
|
[void]$FILES.Add($f)
|
|
}
|
|
|
|
if ($FILES.Count -eq 0) { exit 0 }
|
|
|
|
# ── Scan ──────────────────────────────────────────────────────────────────────
|
|
|
|
$B_FILE = [System.Collections.Generic.List[string]]::new()
|
|
$B_URL = [System.Collections.Generic.List[string]]::new()
|
|
$B_STATUS = [System.Collections.Generic.List[string]]::new()
|
|
$B_ALT = [System.Collections.Generic.List[object]]::new()
|
|
$SEO_LINES = [System.Collections.Generic.List[string]]::new()
|
|
|
|
foreach ($file in $FILES) {
|
|
foreach ($line in (Get-SeoIssues $file)) {
|
|
if ($line) { [void]$SEO_LINES.Add("${file}: $line") }
|
|
}
|
|
|
|
$urls = @(Get-Urls $file)
|
|
if ($urls.Count -eq 0) { continue }
|
|
|
|
if ($HaveParams -and $urls.Count -gt $LIMIT) {
|
|
$ans = Read-Answer " $file has $($urls.Count) links (limit $LIMIT). Continue? [Y/n] "
|
|
if ($ans -in 'n','N','no','NO') { continue }
|
|
}
|
|
|
|
Write-Host ""
|
|
Write-Host " Checking $($urls.Count) link(s) in $file ..."
|
|
foreach ($url in $urls) {
|
|
$status = Get-HttpStatus $url
|
|
if ($status -eq '200') { continue }
|
|
Write-Host " BROKEN ($status) $url"
|
|
# Only look up replacements when files were passed; otherwise just list.
|
|
$alts = @()
|
|
if ($HaveParams) { $alts = Get-SuggestedAlts $url 6 }
|
|
[void]$B_FILE.Add($file)
|
|
[void]$B_URL.Add($url)
|
|
[void]$B_STATUS.Add($status)
|
|
[void]$B_ALT.Add($alts)
|
|
}
|
|
}
|
|
|
|
# ── SEO report ────────────────────────────────────────────────────────────────
|
|
|
|
if ($SEO_LINES.Count -gt 0) {
|
|
Write-Host ""
|
|
Write-Host "------------------------------------------------------------"
|
|
Write-Host " SEO anchor issues (consider descriptive link text)"
|
|
foreach ($s in $SEO_LINES) { Write-Host " $s" }
|
|
}
|
|
|
|
if ($B_URL.Count -eq 0) {
|
|
Write-Host ""
|
|
Write-Host " No broken links found."
|
|
Write-Host ""
|
|
exit 0
|
|
}
|
|
|
|
# ── Interactive fix ───────────────────────────────────────────────────────────
|
|
|
|
Write-Host ""
|
|
Write-Host "============================================================"
|
|
Write-Host " fix-broken-links report"
|
|
Write-Host "============================================================"
|
|
|
|
$CHANGED = @{}
|
|
$n = $B_URL.Count
|
|
for ($i = 0; $i -lt $n; $i++) {
|
|
$file = $B_FILE[$i]
|
|
$url = $B_URL[$i]
|
|
$status = $B_STATUS[$i]
|
|
$alts = @($B_ALT[$i])
|
|
|
|
Write-Host ""
|
|
Write-Host " [$($i + 1)] $file"
|
|
Write-Host " URL : $url"
|
|
$note = ''
|
|
if ($status -in 'ERR','000','TIMEOUT') { $note = ' (unreachable)' }
|
|
Write-Host " HTTP: $status$note"
|
|
|
|
# No file parameters → report-only: list the broken link and move on.
|
|
if (-not $HaveParams) { continue }
|
|
|
|
Write-Host ""
|
|
if ($alts.Count -gt 0) {
|
|
Write-Host " r Replace -> $($alts[0])"
|
|
for ($k = 1; $k -lt $alts.Count; $k++) {
|
|
Write-Host " $k Replace -> $($alts[$k])"
|
|
}
|
|
}
|
|
Write-Host " d Remove link, keep text"
|
|
Write-Host " c Custom replacement URL"
|
|
Write-Host " s Skip"
|
|
|
|
if (-not $Interactive) {
|
|
Write-Host " (no terminal - reporting only)"
|
|
continue
|
|
}
|
|
|
|
while ($true) {
|
|
$ch = Read-Answer ' > '
|
|
if ($ch -eq 's' -or $ch -eq '') { break }
|
|
elseif ($ch -eq 'd') {
|
|
Remove-LinkWrapper $file $url; $CHANGED[$file] = $true; Write-Host " removed"; break
|
|
}
|
|
elseif ($ch -eq 'r') {
|
|
if ($alts.Count -gt 0) {
|
|
Set-UrlReplacement $file $url $alts[0]; $CHANGED[$file] = $true
|
|
Write-Host " replaced -> $($alts[0])"; break
|
|
}
|
|
Write-Host " no suggestion available"
|
|
}
|
|
elseif ($ch -match '^[1-9]$') {
|
|
$idx = [int]$ch
|
|
if ($idx -lt $alts.Count) {
|
|
Set-UrlReplacement $file $url $alts[$idx]; $CHANGED[$file] = $true
|
|
Write-Host " replaced -> $($alts[$idx])"; break
|
|
}
|
|
Write-Host " invalid choice"
|
|
}
|
|
elseif ($ch -eq 'c') {
|
|
$u = Read-Answer ' URL: '
|
|
if ($u) { Set-UrlReplacement $file $url $u; $CHANGED[$file] = $true; Write-Host " replaced"; break }
|
|
}
|
|
else {
|
|
Write-Host " invalid choice"
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($CHANGED.Count -gt 0) {
|
|
Write-Host ""
|
|
Write-Host " $($CHANGED.Count) file(s) updated:"
|
|
foreach ($f in $CHANGED.Keys) { Write-Host " $f" }
|
|
Write-Host ""
|
|
}
|
|
exit 0
|