Merge pull request #96661 from jeremyje/fastboot

Significantly improve download artifact latencies on GCE Windows node boot.
This commit is contained in:
Kubernetes Prow Robot 2020-12-08 18:35:05 -08:00 committed by GitHub
commit d63a62d7bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -128,53 +128,132 @@ function Validate-SHA {
# It will loop through the URLs list forever until it has a success. If
# successful, it will write the file to OutFile. You can optionally provide a
# Hash argument with an optional Algorithm, in which case it will attempt to
# validate the downloaded file against the hash. SHA512 will be used if Algorithm
# is not provided.
# validate the downloaded file against the hash. SHA512 will be used if
# -Algorithm is not provided.
# This function is idempotent, if OutFile already exists and has the correct Hash
# then the download will be skipped. If the Hash is incorrect, the file will be
# overwritten.
function MustDownload-File {
param (
[parameter(Mandatory=$false)] [string]$Hash,
[parameter(Mandatory=$false)] [string]$Algorithm = 'SHA512',
[parameter(Mandatory=$true)] [string]$OutFile,
[parameter(Mandatory=$true)] [System.Collections.Generic.List[String]]$URLs,
[parameter(Mandatory=$false)] [System.Collections.IDictionary]$Headers = @{}
[parameter(Mandatory = $false)] [string]$Hash,
[parameter(Mandatory = $false)] [string]$Algorithm = 'SHA512',
[parameter(Mandatory = $true)] [string]$OutFile,
[parameter(Mandatory = $true)] [System.Collections.Generic.List[String]]$URLs,
[parameter(Mandatory = $false)] [System.Collections.IDictionary]$Headers = @{},
[parameter(Mandatory = $false)] [int]$Attempts = 0
)
While($true) {
ForEach($url in $URLs) {
# If the URL is for GCS and the node has dev storage scope, add the
# service account token to the request headers.
if (($url -match "^https://storage`.googleapis`.com.*") -and $(Check-StorageScope)) {
$Headers["Authorization"] = "Bearer $(Get-Credentials)"
}
# If the file is already downloaded and matches the expected hash, skip the download.
if ((Test-Path -Path $OutFile) -And -Not [string]::IsNullOrEmpty($Hash)) {
try {
Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm
Log-Output "Skip download of ${OutFile}, it already exists with expected hash."
return
}
catch {
# The hash does not match the file on disk.
# Proceed with the download and overwrite the file.
Log-Output "${OutFile} exists but had wrong hash. Redownloading."
}
}
# Attempt to download the file
Try {
# TODO(mtaufen): When we finally get a Windows version that has Powershell 6
# installed we can set `-MaximumRetryCount 6 -RetryIntervalSec 10` to make this even more robust.
$result = Invoke-WebRequest $url -Headers $Headers -OutFile $OutFile -TimeoutSec 300
} Catch {
$currentAttempt = 0
while ($true) {
foreach ($url in $URLs) {
if (($Attempts -ne 0) -And ($currentAttempt -Gt 5)) {
throw "Attempted to download ${url} ${currentAttempt} times. Giving up."
}
$currentAttempt++
try {
Get-RemoteFile -OutFile $OutFile -Url $url -Headers $Headers
}
catch {
$message = $_.Exception.ToString()
Log-Output "Failed to download file from $url. Will retry. Error: $message"
Log-Output "Failed to download file from ${Url}. Will retry. Error: ${message}"
continue
}
# Attempt to validate the hash
if ($Hash) {
Try {
Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm
} Catch {
$message = $_.Exception.ToString()
Log-Output "Hash validation of $url failed. Will retry. Error: $message"
continue
if (-Not [string]::IsNullOrEmpty($Hash)) {
try {
Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm
}
Log-Output "Downloaded $url ($Algorithm = $Hash)"
catch {
$message = $_.Exception.ToString()
Log-Output "Hash validation of ${url} failed. Will retry. Error: ${message}"
continue
}
Log-Output "Downloaded ${url} (${Algorithm} = ${Hash})"
return
}
Log-Output "Downloaded $url"
Log-Output "Downloaded ${url}"
return
}
}
}
# Downloads a file via HTTP/HTTPS.
# If the file is stored in GCS and this is running on a GCE node with a service account
# with credentials that have the devstore.read_only auth scope the bearer token will be
# automatically added to download the file.
function Get-RemoteFile {
param (
[parameter(Mandatory = $true)] [string]$OutFile,
[parameter(Mandatory = $true)] [string]$Url,
[parameter(Mandatory = $false)] [System.Collections.IDictionary]$Headers = @{}
)
$timeout = New-TimeSpan -Minutes 5
try {
# Use HttpClient in favor of WebClient.
# https://docs.microsoft.com/en-us/dotnet/api/system.net.webclient?view=net-5.0#remarks
$httpClient = New-Object -TypeName System.Net.Http.HttpClient
$httpClient.Timeout = $timeout
foreach ($key in $Headers.Keys) {
$httpClient.DefaultRequestHeaders.Add($key, $Headers[$key])
}
# If the URL is for GCS and the node has dev storage scope, add the
# service account OAuth2 bearer token to the request headers.
# https://cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances#applications
if (($Url -match "^https://storage`.googleapis`.com.*") -And $(Check-StorageScope)) {
$httpClient.DefaultRequestHeaders.Add("Authorization", "Bearer $(Get-Credentials)")
}
# Attempt to download the file
$httpResponseMessage = $httpClient.GetAsync([System.Uri]::new($Url))
$httpResponseMessage.Wait()
if (-Not $httpResponseMessage.IsCanceled) {
# Check if the request was successful.
#
# DO NOT replace with EnsureSuccessStatusCode(), it prints the
# OAuth2 bearer token.
if (-Not $httpResponseMessage.Result.IsSuccessStatusCode) {
$statusCode = $httpResponseMessage.Result.StatusCode
throw "Downloading ${Url} returned status code ${statusCode}, retrying."
}
try {
$outFileStream = [System.IO.FileStream]::new($OutFile, [System.IO.FileMode]::Create, [System.IO.FileAccess]::Write)
$copyResult = $httpResponseMessage.Result.Content.CopyToAsync($outFileStream)
$copyResult.Wait()
$outFileStream.Close()
if ($null -ne $copyResult.Exception) {
throw $copyResult.Exception
}
}
finally {
if ($null -ne $outFileStream) {
$outFileStream.Dispose()
}
}
}
}
finally {
if ($null -ne $httpClient) {
$httpClient.Dispose()
}
}
}
# Returns the default service account token for the VM, retrieved from
# the instance metadata.
function Get-Credentials {