Spark: Add liveness probes for master/worker

It turns out, the pods don't exit when the master or worker crash.

Along the way: remove redundant metadata.
This commit is contained in:
Zach Loafman 2015-10-29 05:22:56 -07:00
parent 413ec6c83a
commit d24d2a21f9
4 changed files with 18 additions and 9 deletions

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1 apiVersion: v1
metadata: metadata:
name: spark-driver-controller name: spark-driver-controller
labels:
component: spark-driver
spec: spec:
replicas: 1 replicas: 1
selector: selector:

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1 apiVersion: v1
metadata: metadata:
name: spark-master-controller name: spark-master-controller
labels:
component: spark-master
spec: spec:
replicas: 1 replicas: 1
selector: selector:
@ -19,6 +17,15 @@ spec:
ports: ports:
- containerPort: 7077 - containerPort: 7077
- containerPort: 8080 - containerPort: 8080
livenessProbe:
exec:
command:
- /opt/spark/sbin/spark-daemon.sh
- status
- org.apache.spark.deploy.master.Master
- '1'
initialDelaySeconds: 30
timeoutSeconds: 1
resources: resources:
requests: requests:
cpu: 100m cpu: 100m

View File

@ -2,8 +2,6 @@ kind: Service
apiVersion: v1 apiVersion: v1
metadata: metadata:
name: spark-master name: spark-master
labels:
component: spark-master-service
spec: spec:
ports: ports:
- port: 7077 - port: 7077

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1 apiVersion: v1
metadata: metadata:
name: spark-worker-controller name: spark-worker-controller
labels:
component: spark-worker
spec: spec:
replicas: 3 replicas: 3
selector: selector:
@ -12,13 +10,21 @@ spec:
metadata: metadata:
labels: labels:
component: spark-worker component: spark-worker
uses: spark-master
spec: spec:
containers: containers:
- name: spark-worker - name: spark-worker
image: gcr.io/google_containers/spark-worker:1.5.1_v1 image: gcr.io/google_containers/spark-worker:1.5.1_v1
ports: ports:
- containerPort: 8888 - containerPort: 8888
livenessProbe:
exec:
command:
- /opt/spark/sbin/spark-daemon.sh
- status
- org.apache.spark.deploy.worker.Worker
- '1'
initialDelaySeconds: 30
timeoutSeconds: 1
resources: resources:
requests: requests:
cpu: 100m cpu: 100m