mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Merge pull request #58029 from yguo0905/tpu-api
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. GCE: support Cloud TPU API in cloud provider **What this PR does / why we need it**: This PR adds the support for Cloud TPU API in GCE cloud provider. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # **Special notes for your reviewer**: **Release note**: ```release-note GCE: support Cloud TPU API in cloud provider ``` /assign @vishh /assign @cheftako
This commit is contained in:
commit
e30554bd39
4
Godeps/Godeps.json
generated
4
Godeps/Godeps.json
generated
@ -3056,6 +3056,10 @@
|
|||||||
"ImportPath": "google.golang.org/api/pubsub/v1",
|
"ImportPath": "google.golang.org/api/pubsub/v1",
|
||||||
"Rev": "ab90adb3efa287b869ecb698db42f923cc734972"
|
"Rev": "ab90adb3efa287b869ecb698db42f923cc734972"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"ImportPath": "google.golang.org/api/tpu/v1alpha1",
|
||||||
|
"Rev": "ab90adb3efa287b869ecb698db42f923cc734972"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"ImportPath": "google.golang.org/genproto/googleapis/api/annotations",
|
"ImportPath": "google.golang.org/genproto/googleapis/api/annotations",
|
||||||
"Rev": "09f6ed296fc66555a25fe4ce95173148778dfa85"
|
"Rev": "09f6ed296fc66555a25fe4ce95173148778dfa85"
|
||||||
|
35
Godeps/LICENSES
generated
35
Godeps/LICENSES
generated
@ -89897,6 +89897,41 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
================================================================================
|
================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
= vendor/google.golang.org/api/tpu/v1alpha1 licensed under: =
|
||||||
|
|
||||||
|
Copyright (c) 2011 Google Inc. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
= vendor/google.golang.org/api/LICENSE a651bb3d8b1c412632e28823bb432b40
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
|
||||||
================================================================================
|
================================================================================
|
||||||
= vendor/google.golang.org/genproto/googleapis/api/annotations licensed under: =
|
= vendor/google.golang.org/genproto/googleapis/api/annotations licensed under: =
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ go_library(
|
|||||||
"gce_routes.go",
|
"gce_routes.go",
|
||||||
"gce_targetpool.go",
|
"gce_targetpool.go",
|
||||||
"gce_targetproxy.go",
|
"gce_targetproxy.go",
|
||||||
|
"gce_tpu.go",
|
||||||
"gce_urlmap.go",
|
"gce_urlmap.go",
|
||||||
"gce_util.go",
|
"gce_util.go",
|
||||||
"gce_zones.go",
|
"gce_zones.go",
|
||||||
@ -69,6 +70,7 @@ go_library(
|
|||||||
"//vendor/google.golang.org/api/compute/v1:go_default_library",
|
"//vendor/google.golang.org/api/compute/v1:go_default_library",
|
||||||
"//vendor/google.golang.org/api/container/v1:go_default_library",
|
"//vendor/google.golang.org/api/container/v1:go_default_library",
|
||||||
"//vendor/google.golang.org/api/googleapi:go_default_library",
|
"//vendor/google.golang.org/api/googleapi:go_default_library",
|
||||||
|
"//vendor/google.golang.org/api/tpu/v1alpha1:go_default_library",
|
||||||
"//vendor/gopkg.in/gcfg.v1:go_default_library",
|
"//vendor/gopkg.in/gcfg.v1:go_default_library",
|
||||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||||
|
@ -107,6 +107,7 @@ type GCECloud struct {
|
|||||||
serviceBeta *computebeta.Service
|
serviceBeta *computebeta.Service
|
||||||
serviceAlpha *computealpha.Service
|
serviceAlpha *computealpha.Service
|
||||||
containerService *container.Service
|
containerService *container.Service
|
||||||
|
tpuService *tpuService
|
||||||
client clientset.Interface
|
client clientset.Interface
|
||||||
clientBuilder controller.ControllerClientBuilder
|
clientBuilder controller.ControllerClientBuilder
|
||||||
eventBroadcaster record.EventBroadcaster
|
eventBroadcaster record.EventBroadcaster
|
||||||
@ -430,6 +431,11 @@ func CreateGCECloud(config *CloudConfig) (*GCECloud, error) {
|
|||||||
}
|
}
|
||||||
containerService.UserAgent = userAgent
|
containerService.UserAgent = userAgent
|
||||||
|
|
||||||
|
tpuService, err := newTPUService(client)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// ProjectID and.NetworkProjectID may be project number or name.
|
// ProjectID and.NetworkProjectID may be project number or name.
|
||||||
projID, netProjID := tryConvertToProjectNames(config.ProjectID, config.NetworkProjectID, service)
|
projID, netProjID := tryConvertToProjectNames(config.ProjectID, config.NetworkProjectID, service)
|
||||||
onXPN := projID != netProjID
|
onXPN := projID != netProjID
|
||||||
@ -496,6 +502,7 @@ func CreateGCECloud(config *CloudConfig) (*GCECloud, error) {
|
|||||||
serviceAlpha: serviceAlpha,
|
serviceAlpha: serviceAlpha,
|
||||||
serviceBeta: serviceBeta,
|
serviceBeta: serviceBeta,
|
||||||
containerService: containerService,
|
containerService: containerService,
|
||||||
|
tpuService: tpuService,
|
||||||
projectID: projID,
|
projectID: projID,
|
||||||
networkProjectID: netProjID,
|
networkProjectID: netProjID,
|
||||||
onXPN: onXPN,
|
onXPN: onXPN,
|
||||||
|
189
pkg/cloudprovider/providers/gce/gce_tpu.go
Normal file
189
pkg/cloudprovider/providers/gce/gce_tpu.go
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package gce
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
"google.golang.org/api/googleapi"
|
||||||
|
tpuapi "google.golang.org/api/tpu/v1alpha1"
|
||||||
|
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTPUService returns a new tpuService using the client to communicate with
|
||||||
|
// the Cloud TPU APIs.
|
||||||
|
func newTPUService(client *http.Client) (*tpuService, error) {
|
||||||
|
s, err := tpuapi.New(client)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &tpuService{
|
||||||
|
nodesService: tpuapi.NewProjectsLocationsNodesService(s),
|
||||||
|
operationsService: tpuapi.NewProjectsLocationsOperationsService(s),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// tpuService encapsulates the TPU services on nodes and the operations on the
|
||||||
|
// nodes.
|
||||||
|
type tpuService struct {
|
||||||
|
nodesService *tpuapi.ProjectsLocationsNodesService
|
||||||
|
operationsService *tpuapi.ProjectsLocationsOperationsService
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateTPU creates the Cloud TPU node with the specified name in the
|
||||||
|
// specified zone.
|
||||||
|
func (gce *GCECloud) CreateTPU(ctx context.Context, name, zone string, node *tpuapi.Node) (*tpuapi.Node, error) {
|
||||||
|
var err error
|
||||||
|
mc := newTPUMetricContext("create", zone)
|
||||||
|
defer mc.Observe(err)
|
||||||
|
|
||||||
|
var op *tpuapi.Operation
|
||||||
|
parent := getTPUParentName(gce.projectID, zone)
|
||||||
|
op, err = gce.tpuService.nodesService.Create(parent, node).NodeId(name).Do()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
glog.V(2).Infof("Creating Cloud TPU %q in zone %q with operation %q", name, zone, op.Name)
|
||||||
|
|
||||||
|
op, err = gce.waitForTPUOp(30*time.Second, 10*time.Minute, op)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = getErrorFromTPUOp(op)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
output := new(tpuapi.Node)
|
||||||
|
err = json.Unmarshal(op.Response, output)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("failed to unmarshal response from operation %q: response = %v, err = %v", op.Name, op.Response, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return output, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteTPU deletes the Cloud TPU with the specified name in the specified
|
||||||
|
// zone.
|
||||||
|
func (gce *GCECloud) DeleteTPU(ctx context.Context, name, zone string) error {
|
||||||
|
var err error
|
||||||
|
mc := newTPUMetricContext("delete", zone)
|
||||||
|
defer mc.Observe(err)
|
||||||
|
|
||||||
|
var op *tpuapi.Operation
|
||||||
|
name = getTPUName(gce.projectID, zone, name)
|
||||||
|
op, err = gce.tpuService.nodesService.Delete(name).Do()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
glog.V(2).Infof("Deleting Cloud TPU %q in zone %q with operation %q", name, zone, op.Name)
|
||||||
|
|
||||||
|
op, err = gce.waitForTPUOp(30*time.Second, 10*time.Minute, op)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = getErrorFromTPUOp(op)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetTPU returns the Cloud TPU with the specified name in the specified zone.
|
||||||
|
func (gce *GCECloud) GetTPU(ctx context.Context, name, zone string) (*tpuapi.Node, error) {
|
||||||
|
mc := newTPUMetricContext("get", zone)
|
||||||
|
|
||||||
|
name = getTPUName(gce.projectID, zone, name)
|
||||||
|
node, err := gce.tpuService.nodesService.Get(name).Do()
|
||||||
|
if err != nil {
|
||||||
|
return nil, mc.Observe(err)
|
||||||
|
}
|
||||||
|
return node, mc.Observe(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListTPUs returns Cloud TPUs in the specified zone.
|
||||||
|
func (gce *GCECloud) ListTPUs(ctx context.Context, zone string) ([]*tpuapi.Node, error) {
|
||||||
|
mc := newTPUMetricContext("list", zone)
|
||||||
|
|
||||||
|
parent := getTPUParentName(gce.projectID, zone)
|
||||||
|
response, err := gce.tpuService.nodesService.List(parent).Do()
|
||||||
|
if err != nil {
|
||||||
|
return nil, mc.Observe(err)
|
||||||
|
}
|
||||||
|
return response.Nodes, mc.Observe(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitForTPUOp checks whether the op is done every interval before the timeout
|
||||||
|
// occurs.
|
||||||
|
func (gce *GCECloud) waitForTPUOp(interval, timeout time.Duration, op *tpuapi.Operation) (*tpuapi.Operation, error) {
|
||||||
|
if err := wait.PollImmediate(interval, timeout, func() (bool, error) {
|
||||||
|
glog.V(3).Infof("Waiting for operation %q to complete...", op.Name)
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
gce.operationPollRateLimiter.Accept()
|
||||||
|
duration := time.Now().Sub(start)
|
||||||
|
if duration > 5*time.Second {
|
||||||
|
glog.V(2).Infof("Getting operation %q throttled for %v", op.Name, duration)
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
op, err = gce.tpuService.operationsService.Get(op.Name).Do()
|
||||||
|
if err != nil {
|
||||||
|
return true, err
|
||||||
|
}
|
||||||
|
if op.Done {
|
||||||
|
glog.V(3).Infof("Operation %q has completed", op.Name)
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to wait for operation %q: %s", op.Name, err)
|
||||||
|
}
|
||||||
|
return op, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newTPUMetricContext returns a new metricContext used for recording metrics
|
||||||
|
// of Cloud TPU API calls.
|
||||||
|
func newTPUMetricContext(request, zone string) *metricContext {
|
||||||
|
return newGenericMetricContext("tpus", request, unusedMetricLabel, zone, "alpha")
|
||||||
|
}
|
||||||
|
|
||||||
|
// getErrorFromTPUOp returns the error in the failed op, or nil if the op
|
||||||
|
// succeed.
|
||||||
|
func getErrorFromTPUOp(op *tpuapi.Operation) error {
|
||||||
|
if op != nil && op.Error != nil {
|
||||||
|
return &googleapi.Error{
|
||||||
|
Code: op.ServerResponse.HTTPStatusCode,
|
||||||
|
Message: op.Error.Message,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTPUParentName(project, zone string) string {
|
||||||
|
return fmt.Sprintf("projects/%s/locations/%s", project, zone)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTPUName(project, zone, name string) string {
|
||||||
|
return fmt.Sprintf("projects/%s/locations/%s/nodes/%s", project, zone, name)
|
||||||
|
}
|
1
vendor/BUILD
vendored
1
vendor/BUILD
vendored
@ -397,6 +397,7 @@ filegroup(
|
|||||||
"//vendor/google.golang.org/api/logging/v2beta1:all-srcs",
|
"//vendor/google.golang.org/api/logging/v2beta1:all-srcs",
|
||||||
"//vendor/google.golang.org/api/monitoring/v3:all-srcs",
|
"//vendor/google.golang.org/api/monitoring/v3:all-srcs",
|
||||||
"//vendor/google.golang.org/api/pubsub/v1:all-srcs",
|
"//vendor/google.golang.org/api/pubsub/v1:all-srcs",
|
||||||
|
"//vendor/google.golang.org/api/tpu/v1alpha1:all-srcs",
|
||||||
"//vendor/google.golang.org/genproto/googleapis/api/annotations:all-srcs",
|
"//vendor/google.golang.org/genproto/googleapis/api/annotations:all-srcs",
|
||||||
"//vendor/google.golang.org/genproto/googleapis/rpc/status:all-srcs",
|
"//vendor/google.golang.org/genproto/googleapis/rpc/status:all-srcs",
|
||||||
"//vendor/google.golang.org/grpc:all-srcs",
|
"//vendor/google.golang.org/grpc:all-srcs",
|
||||||
|
28
vendor/google.golang.org/api/tpu/v1alpha1/BUILD
generated
vendored
Normal file
28
vendor/google.golang.org/api/tpu/v1alpha1/BUILD
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||||
|
|
||||||
|
go_library(
|
||||||
|
name = "go_default_library",
|
||||||
|
srcs = ["tpu-gen.go"],
|
||||||
|
importpath = "google.golang.org/api/tpu/v1alpha1",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//vendor/golang.org/x/net/context:go_default_library",
|
||||||
|
"//vendor/golang.org/x/net/context/ctxhttp:go_default_library",
|
||||||
|
"//vendor/google.golang.org/api/gensupport:go_default_library",
|
||||||
|
"//vendor/google.golang.org/api/googleapi:go_default_library",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "package-srcs",
|
||||||
|
srcs = glob(["**"]),
|
||||||
|
tags = ["automanaged"],
|
||||||
|
visibility = ["//visibility:private"],
|
||||||
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "all-srcs",
|
||||||
|
srcs = [":package-srcs"],
|
||||||
|
tags = ["automanaged"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
1006
vendor/google.golang.org/api/tpu/v1alpha1/tpu-api.json
generated
vendored
Normal file
1006
vendor/google.golang.org/api/tpu/v1alpha1/tpu-api.json
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3292
vendor/google.golang.org/api/tpu/v1alpha1/tpu-gen.go
generated
vendored
Normal file
3292
vendor/google.golang.org/api/tpu/v1alpha1/tpu-gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user