From d0566faace74616a2a06b98246cc757e1d8d4a49 Mon Sep 17 00:00:00 2001 From: Ian Chakeres Date: Thu, 1 Jun 2017 11:06:48 -0700 Subject: [PATCH 1/3] Added logic to copy-to-staging to avoid copying if the same file already exists in gce --- cluster/gce/util.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 05bcd9a7363..a5c5c413d4e 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -154,11 +154,23 @@ function copy-to-staging() { local -r gs_url=$2 local -r tar=$3 local -r hash=$4 + local -r basename_tar=$(basename ${tar}) + + #check whether this tar alread exists and has the same hash + #if it matches, then don't bother uploading it again + local -r remote_tar_hash=$(gsutil hash -h -m ${staging_path}/${basename_tar} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}') + if [[ -n ${remote_tar_hash} ]]; then + local -r local_tar_hash=$(gsutil hash -h -m ${tar} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}') + if [[ "${remote_tar_hash}" == "${local_tar_hash}" ]]; then + echo "+++ ${basename_tar} uploaded earlier and hash matches" + return 0 + fi + fi echo "${hash}" > "${tar}.sha1" gsutil -m -q -h "Cache-Control:private, max-age=0" cp "${tar}" "${tar}.sha1" "${staging_path}" gsutil -m acl ch -g all:R "${gs_url}" "${gs_url}.sha1" >/dev/null 2>&1 - echo "+++ $(basename ${tar}) uploaded (sha1 = ${hash})" + echo "+++ ${basename_tar} uploaded (sha1 = ${hash})" } # Given the cluster zone, return the list of regional GCS release From 14391d3eb8c140483bb521dd4575441e969f888d Mon Sep 17 00:00:00 2001 From: Ian Chakeres Date: Tue, 13 Jun 2017 16:12:21 -0700 Subject: [PATCH 2/3] Moved md5 comand to a separate function and added comments --- cluster/gce/util.sh | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index a5c5c413d4e..2f1f1a95c5e 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -158,11 +158,17 @@ function copy-to-staging() { #check whether this tar alread exists and has the same hash #if it matches, then don't bother uploading it again - local -r remote_tar_hash=$(gsutil hash -h -m ${staging_path}/${basename_tar} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}') - if [[ -n ${remote_tar_hash} ]]; then - local -r local_tar_hash=$(gsutil hash -h -m ${tar} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}') - if [[ "${remote_tar_hash}" == "${local_tar_hash}" ]]; then - echo "+++ ${basename_tar} uploaded earlier and hash matches" + + #remote_tar_md5 checks the remote location for the existing tarball and its md5 + #staging_path example gs://kubernetes-staging-PROJECT/kubernetes-devel + #basename_tar example kubernetes-server-linux-amd64.tar.gz + local -r remote_tar_md5=$(gsutil_get_tar_md5 "${staging_path}/${basename_tar}") + if [[ -n ${remote_tar_md5} ]]; then + #local_tar_md5 checks the remote location for the existing tarball and its md5 hash + #tar example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz + local -r local_tar_md5=$(gsutil_get_tar_md5 "${tar}") + if [[ "${remote_tar_md5}" == "${local_tar_md5}" ]]; then + echo "+++ ${basename_tar} uploaded earlier, cloud and local file md5 match (md5 = ${local_tar_md5})" return 0 fi fi @@ -173,6 +179,18 @@ function copy-to-staging() { echo "+++ ${basename_tar} uploaded (sha1 = ${hash})" } +# Use gsutil to get the md5 hash for a particular tar +function gsutil_get_tar_md5() { + # location_tar could be local or in the cloud + # local tar_location example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz + # cloud tar_location example gs://kubernetes-staging-PROJECT/kubernetes-devel/kubernetes-server-linux-amd64.tar.gz + local -r tar_location=$1 + #parse the output and return the md5 hash + #the sed command at the end removes whitespace + local -r tar_md5=$(gsutil hash -h -m ${tar_location} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}' | sed 's/^[[:space:]]*//g') + echo "${tar_md5}" +} + # Given the cluster zone, return the list of regional GCS release # bucket suffixes for the release in preference order. GCS doesn't # give us an API for this, so we hardcode it. From b2450d2eb7b0def9ccd6b348515d880db81a9e4d Mon Sep 17 00:00:00 2001 From: Ian Chakeres Date: Wed, 14 Jun 2017 07:49:59 -0700 Subject: [PATCH 3/3] Moved gsutil_get_tar_md5 function before copy-to-staging function --- cluster/gce/util.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 2f1f1a95c5e..1315a739650 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -148,6 +148,18 @@ function detect-project() { fi } +# Use gsutil to get the md5 hash for a particular tar +function gsutil_get_tar_md5() { + # location_tar could be local or in the cloud + # local tar_location example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz + # cloud tar_location example gs://kubernetes-staging-PROJECT/kubernetes-devel/kubernetes-server-linux-amd64.tar.gz + local -r tar_location=$1 + #parse the output and return the md5 hash + #the sed command at the end removes whitespace + local -r tar_md5=$(gsutil hash -h -m ${tar_location} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}' | sed 's/^[[:space:]]*//g') + echo "${tar_md5}" +} + # Copy a release tar and its accompanying hash. function copy-to-staging() { local -r staging_path=$1 @@ -179,17 +191,6 @@ function copy-to-staging() { echo "+++ ${basename_tar} uploaded (sha1 = ${hash})" } -# Use gsutil to get the md5 hash for a particular tar -function gsutil_get_tar_md5() { - # location_tar could be local or in the cloud - # local tar_location example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz - # cloud tar_location example gs://kubernetes-staging-PROJECT/kubernetes-devel/kubernetes-server-linux-amd64.tar.gz - local -r tar_location=$1 - #parse the output and return the md5 hash - #the sed command at the end removes whitespace - local -r tar_md5=$(gsutil hash -h -m ${tar_location} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}' | sed 's/^[[:space:]]*//g') - echo "${tar_md5}" -} # Given the cluster zone, return the list of regional GCS release # bucket suffixes for the release in preference order. GCS doesn't