Merge pull request #46792 from ianchakeres/avoid-redundant-copy-to-staging

Automatic merge from submit-queue (batch tested with PRs 47403, 46646, 46906, 46527, 46792) Avoid redundant copying of tars during kube-up for gce if the same file already exists **What this PR does / why we need it**: Whenever I execute cluster/kube-up.sh it copies my tar files to google cloud, even if the files haven't changed. This PR checks to see whether the files already exist, and avoids uploading them again. These files are large and can take a long time to upload. **Which issue this PR fixes**: fixes #46791 **Special notes for your reviewer**: Here is the new output: cluster/kube-up.sh ... Starting cluster in us-central1-b using provider gce ... calling verify-prereqs ... calling verify-kube-binaries ... calling kube-up Project: PROJECT Zone: us-central1-b +++ Staging server tars to Google Storage: gs://kubernetes-staging-PROJECT/kubernetes-devel +++ kubernetes-server-linux-amd64.tar.gz uploaded earlier, cloud and local file md5 match (md5 = 3a095kcf27267a71fe58f91f89fab1bc) **Release note**: ```cluster/kube-up.sh on gce now avoids redundant copying of kubernetes tars if the local and cloud files' md5 hash match```
2025-09-13 21:25:09 +00:00 · 2017-06-23 02:59:31 -07:00
parent d7c569eda7 b2450d2eb7
commit cdc9770346
1 changed files with 32 additions and 1 deletions
--- a/cluster/gce/util.sh
+++ b/cluster/gce/util.sh
@@ -148,19 +148,50 @@ function detect-project() {
  fi
 }

+# Use gsutil to get the md5 hash for a particular tar
+function gsutil_get_tar_md5() {
+  # location_tar could be local or in the cloud
+  # local tar_location example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz
+  # cloud tar_location example gs://kubernetes-staging-PROJECT/kubernetes-devel/kubernetes-server-linux-amd64.tar.gz
+  local -r tar_location=$1
+  #parse the output and return the md5 hash
+  #the sed command at the end removes whitespace
+  local -r tar_md5=$(gsutil hash -h -m ${tar_location} 2>/dev/null | grep "Hash (md5):" | awk -F ':' '{print $2}' | sed 's/^[[:space:]]*//g')
+  echo "${tar_md5}"
+}
+
 # Copy a release tar and its accompanying hash.
 function copy-to-staging() {
  local -r staging_path=$1
  local -r gs_url=$2
  local -r tar=$3
  local -r hash=$4
+  local -r basename_tar=$(basename ${tar})
+
+  #check whether this tar alread exists and has the same hash
+  #if it matches, then don't bother uploading it again
+
+  #remote_tar_md5 checks the remote location for the existing tarball and its md5
+  #staging_path example gs://kubernetes-staging-PROJECT/kubernetes-devel
+  #basename_tar example kubernetes-server-linux-amd64.tar.gz
+  local -r remote_tar_md5=$(gsutil_get_tar_md5 "${staging_path}/${basename_tar}")
+  if [[ -n ${remote_tar_md5} ]]; then
+    #local_tar_md5 checks the remote location for the existing tarball and its md5 hash
+    #tar example ./_output/release-tars/kubernetes-server-linux-amd64.tar.gz
+    local -r local_tar_md5=$(gsutil_get_tar_md5 "${tar}")
+    if [[ "${remote_tar_md5}" == "${local_tar_md5}" ]]; then
+      echo "+++ ${basename_tar} uploaded earlier, cloud and local file md5 match (md5 = ${local_tar_md5})"
+      return 0
+    fi
+  fi

  echo "${hash}" > "${tar}.sha1"
  gsutil -m -q -h "Cache-Control:private, max-age=0" cp "${tar}" "${tar}.sha1" "${staging_path}"
  gsutil -m acl ch -g all:R "${gs_url}" "${gs_url}.sha1" >/dev/null 2>&1
-  echo "+++ $(basename ${tar}) uploaded (sha1 = ${hash})"
+  echo "+++ ${basename_tar} uploaded (sha1 = ${hash})"
 }

+
 # Given the cluster zone, return the list of regional GCS release
 # bucket suffixes for the release in preference order. GCS doesn't
 # give us an API for this, so we hardcode it.