diff --git a/examples/examples_test.go b/examples/examples_test.go index a18b2424907..8177a2e4896 100644 --- a/examples/examples_test.go +++ b/examples/examples_test.go @@ -344,10 +344,11 @@ func TestExampleObjectSchemas(t *testing.T) { "secret": &api.Secret{}, }, "../examples/spark": { + "spark-driver-controller": &api.ReplicationController{}, + "spark-master-controller": &api.ReplicationController{}, "spark-master-service": &api.Service{}, - "spark-master": &api.Pod{}, + "spark-webui": &api.Service{}, "spark-worker-controller": &api.ReplicationController{}, - "spark-driver": &api.Pod{}, }, "../examples/spark/spark-gluster": { "spark-master-service": &api.Service{}, diff --git a/examples/spark/README.md b/examples/spark/README.md index cefc2762b37..3f1a5ae1ff6 100644 --- a/examples/spark/README.md +++ b/examples/spark/README.md @@ -57,60 +57,98 @@ instructions for your platform. ## Step One: Start your Master service -The Master [service](../../docs/user-guide/services.md) is the master (or head) service for a Spark -cluster. +The Master [service](../../docs/user-guide/services.md) is the master service +for a Spark cluster. -Use the [`examples/spark/spark-master.json`](spark-master.json) file to create a [pod](../../docs/user-guide/pods.md) running -the Master service. +Use the +[`examples/spark/spark-master-controller.yaml`](spark-master-controller.yaml) +file to create a +[replication controller](../../docs/user-guide/replication-controller.md) +running the Spark Master service. -```sh -$ kubectl create -f examples/spark/spark-master.json +```console +$ kubectl create -f examples/spark/spark-master-controller.yaml +replicationcontrollers/spark-master-controller ``` -Then, use the [`examples/spark/spark-master-service.json`](spark-master-service.json) file to -create a logical service endpoint that Spark workers can use to access -the Master pod. +Then, use the +[`examples/spark/spark-master-service.yaml`](spark-master-service.yaml) file to +create a logical service endpoint that Spark workers can use to access the +Master pod. -```sh -$ kubectl create -f examples/spark/spark-master-service.json +```console +$ kubectl create -f examples/spark/spark-master-service.yaml +services/spark-master +``` + +Optionally, you can create a service for the Spark Master WebUI at this point as +well. If you are running on a cloud provider that supports it, this will create +an external load balancer and open a firewall to the Spark Master WebUI on the +cluster. **Note:** With the existing configuration, there is **ABSOLUTELY NO** +authentication on this WebUI. With slightly more work, it would be +straightforward to put an `nginx` proxy in front to password protect it. + +```console +$ kubectl create -f examples/spark/spark-webui.yaml +services/spark-webui ``` ### Check to see if Master is running and accessible -```sh +```console $ kubectl get pods -NAME READY STATUS RESTARTS AGE -[...] -spark-master 1/1 Running 0 25s - +NAME READY STATUS RESTARTS AGE +spark-master-controller-5u0q5 1/1 Running 0 8m ``` -Check logs to see the status of the master. +Check logs to see the status of the master. (Use the pod retrieved from the previous output.) ```sh -$ kubectl logs spark-master - -starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.4.0-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master.out -Spark Command: /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java -cp /opt/spark-1.4.0-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar -Xms512m -Xmx512m -XX:MaxPermSize=128m org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080 +$ kubectl logs spark-master-controller-5u0q5 +starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.5.1-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master-controller-g0oao.out +Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /opt/spark-1.5.1-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.5.1-bin-hadoop2.6/lib/spark-assembly-1.5.1-hadoop2.6.0.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar -Xms1g -Xmx1g org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080 ======================================== -15/06/26 14:01:49 INFO Master: Registered signal handlers for [TERM, HUP, INT] -15/06/26 14:01:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable -15/06/26 14:01:51 INFO SecurityManager: Changing view acls to: root -15/06/26 14:01:51 INFO SecurityManager: Changing modify acls to: root -15/06/26 14:01:51 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root) -15/06/26 14:01:51 INFO Slf4jLogger: Slf4jLogger started -15/06/26 14:01:51 INFO Remoting: Starting remoting -15/06/26 14:01:52 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077] -15/06/26 14:01:52 INFO Utils: Successfully started service 'sparkMaster' on port 7077. -15/06/26 14:01:52 INFO Utils: Successfully started service on port 6066. -15/06/26 14:01:52 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066 -15/06/26 14:01:52 INFO Master: Starting Spark master at spark://spark-master:7077 -15/06/26 14:01:52 INFO Master: Running Spark version 1.4.0 -15/06/26 14:01:52 INFO Utils: Successfully started service 'MasterUI' on port 8080. -15/06/26 14:01:52 INFO MasterWebUI: Started MasterWebUI at http://10.244.2.34:8080 -15/06/26 14:01:53 INFO Master: I have been elected leader! New state: ALIVE +15/10/27 21:25:05 INFO Master: Registered signal handlers for [TERM, HUP, INT] +15/10/27 21:25:05 INFO SecurityManager: Changing view acls to: root +15/10/27 21:25:05 INFO SecurityManager: Changing modify acls to: root +15/10/27 21:25:05 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root) +15/10/27 21:25:06 INFO Slf4jLogger: Slf4jLogger started +15/10/27 21:25:06 INFO Remoting: Starting remoting +15/10/27 21:25:06 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077] +15/10/27 21:25:06 INFO Utils: Successfully started service 'sparkMaster' on port 7077. +15/10/27 21:25:07 INFO Master: Starting Spark master at spark://spark-master:7077 +15/10/27 21:25:07 INFO Master: Running Spark version 1.5.1 +15/10/27 21:25:07 INFO Utils: Successfully started service 'MasterUI' on port 8080. +15/10/27 21:25:07 INFO MasterWebUI: Started MasterWebUI at http://spark-master:8080 +15/10/27 21:25:07 INFO Utils: Successfully started service on port 6066. +15/10/27 21:25:07 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066 +15/10/27 21:25:07 INFO Master: I have been elected leader! New state: ALIVE ``` +If you created the Spark WebUI and waited sufficient time for the load balancer +to be create, the `spark-webui` service should look something like this: + +```console +$ kubectl describe services/spark-webui +Name: spark-webui +Namespace: default +Labels: +Selector: component=spark-master +Type: LoadBalancer +IP: 10.0.152.249 +LoadBalancer Ingress: 104.197.147.190 +Port: 8080/TCP +NodePort: 31141/TCP +Endpoints: 10.244.1.12:8080 +Session Affinity: None +Events: [...] +``` + +You should now be able to visit `http://104.197.147.190:8080` and see the Spark +Master UI. *Note:* After workers connect, this UI has links to worker Web +UIs. The worker UI links do not work (the links attempt to connect to cluster +IPs). + ## Step Two: Start your Spark workers The Spark workers do the heavy lifting in a Spark cluster. They @@ -119,96 +157,104 @@ program. The Spark workers need the Master service to be running. -Use the [`examples/spark/spark-worker-controller.json`](spark-worker-controller.json) file to create a +Use the [`examples/spark/spark-worker-controller.yaml`](spark-worker-controller.yaml) file to create a [replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods. -```sh -$ kubectl create -f examples/spark/spark-worker-controller.json +```console +$ kubectl create -f examples/spark/spark-worker-controller.yaml ``` ### Check to see if the workers are running -```sh -$ kubectl get pods -NAME READY STATUS RESTARTS AGE -[...] -spark-master 1/1 Running 0 14m -spark-worker-controller-hifwi 1/1 Running 0 33s -spark-worker-controller-u40r2 1/1 Running 0 33s -spark-worker-controller-vpgyg 1/1 Running 0 33s +If you launched the Spark WebUI, your workers should just appear in the UI when +they're ready. (It may take a little bit to pull the images and launch the +pods.) You can also interrogate the status in the following way: -$ kubectl logs spark-master +```console +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +spark-master-controller-5u0q5 1/1 Running 0 25m +spark-worker-controller-e8otp 1/1 Running 0 6m +spark-worker-controller-fiivl 1/1 Running 0 6m +spark-worker-controller-ytc7o 1/1 Running 0 6m + +$ kubectl logs spark-master-controller-5u0q5 [...] -15/06/26 14:15:43 INFO Master: Registering worker 10.244.2.35:46199 with 1 cores, 2.6 GB RAM -15/06/26 14:15:55 INFO Master: Registering worker 10.244.1.15:44839 with 1 cores, 2.6 GB RAM -15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM +15/10/26 18:20:14 INFO Master: Registering worker 10.244.1.13:53567 with 2 cores, 6.3 GB RAM +15/10/26 18:20:14 INFO Master: Registering worker 10.244.2.7:46195 with 2 cores, 6.3 GB RAM +15/10/26 18:20:14 INFO Master: Registering worker 10.244.3.8:39926 with 2 cores, 6.3 GB RAM ``` ## Step Three: Start your Spark driver to launch jobs on your Spark cluster The Spark driver is used to launch jobs into Spark cluster. You can read more about it in -[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html). +[Spark architecture](https://spark.apache.org/docs/latest/cluster-overview.html). -```shell -$ kubectl create -f examples/spark/spark-driver.json +```console +$ kubectl create -f examples/spark/spark-driver-controller.yaml +replicationcontrollers/spark-driver-controller ``` The Spark driver needs the Master service to be running. ### Check to see if the driver is running -```shell -$ kubectl get pods -NAME READY REASON RESTARTS AGE -[...] -spark-master 1/1 Running 0 14m -spark-driver 1/1 Running 0 10m +```console +$ kubectl get pods -lcomponent=spark-driver +NAME READY STATUS RESTARTS AGE +spark-driver-controller-vwb9c 1/1 Running 0 1m ``` ## Step Four: Do something with the cluster -Use the kubectl exec to connect to Spark driver +Use the kubectl exec to connect to Spark driver and run a pipeline. -``` -$ kubectl exec spark-driver -it bash -root@spark-driver:/# -root@spark-driver:/# pyspark +```console +$ kubectl exec spark-driver-controller-vwb9c -it pyspark Python 2.7.9 (default, Mar 1 2015, 12:57:24) [GCC 4.9.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. -15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ - /__ / .__/\_,_/_/ /_/\_\ version 1.4.0 + /__ / .__/\_,_/_/ /_/\_\ version 1.5.1 /_/ + Using Python version 2.7.9 (default, Mar 1 2015 12:57:24) SparkContext available as sc, HiveContext available as sqlContext. ->>> import socket ->>> sc.parallelize(range(1000)).map(lambda x:socket.gethostname()).distinct().collect() -['spark-worker-controller-u40r2', 'spark-worker-controller-hifwi', 'spark-worker-controller-vpgyg'] +>>> sc.textFile("gs://dataflow-samples/shakespeare/*").map(lambda s: len(s.split())).sum() +939193 ``` +Congratulations, you just counted all of the words in all of the plays of +Shakespeare. + ## Result -You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers. -You can take this example to the next step and start using the Apache Spark cluster -you just created, see [Spark documentation](https://spark.apache.org/documentation.html) -for more information. +You now have services and replication controllers for the Spark master, Spark +workers and Spark driver. You can take this example to the next step and start +using the Apache Spark cluster you just created, see +[Spark documentation](https://spark.apache.org/documentation.html) for more +information. ## tl;dr -```kubectl create -f spark-master.json``` +```console +kubectl create -f examples/spark/spark-master-controller.yaml +kubectl create -f examples/spark/spark-master-service.yaml +kubectl create -f examples/spark/spark-webui.yaml +kubectl create -f examples/spark/spark-worker-controller.yaml +kubectl create -f examples/spark/spark-driver-controller.yaml +``` -```kubectl create -f spark-master-service.json``` - -Make sure the Master Pod is running (use: ```kubectl get pods```). - -```kubectl create -f spark-worker-controller.json``` - -```kubectl create -f spark-driver.json``` +After it's setup: +```console +kubectl get pods # Make sure everything is running +kubectl get services spark-webui # Get the IP of the Spark WebUI +kubectl get pods -lcomponent=spark-driver # Get the driver pod to interact with. +``` [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]() diff --git a/examples/spark/images/Makefile b/examples/spark/images/Makefile index 38646334223..fe86c4fec3f 100644 --- a/examples/spark/images/Makefile +++ b/examples/spark/images/Makefile @@ -1,8 +1,9 @@ all: push # To bump the Spark version, bump the version in base/Dockerfile, bump -# this tag, and bump the uses of this tag in -# worker/master/driver. +# this tag and reset to v1. You should also double check the native +# Hadoop libs at that point (we grab the 2.6.1 libs, which are +# appropriate for 1.5.1-with-2.6). TAG = 1.5.1_v1 containers: diff --git a/examples/spark/images/base/Dockerfile b/examples/spark/images/base/Dockerfile index 35c533c9d78..160bcddb975 100644 --- a/examples/spark/images/base/Dockerfile +++ b/examples/spark/images/base/Dockerfile @@ -3,17 +3,35 @@ FROM java:latest RUN apt-get update -y RUN apt-get install -y scala -ENV version 1.5.1 +ENV hadoop_ver 2.6.1 +ENV spark_ver 1.5.1 -# Get Spark from some apache mirror. +# Get Hadoop from US Apache mirror and extract just the native +# libs. (Until we care about running HDFS with these containers, this +# is all we need.) RUN mkdir -p /opt && \ cd /opt && \ - wget http://apache.mirrors.pair.com/spark/spark-${version}/spark-${version}-bin-hadoop2.6.tgz && \ - tar -zvxf spark-${version}-bin-hadoop2.6.tgz && \ - rm spark-${version}-bin-hadoop2.6.tgz && \ - ln -s spark-${version}-bin-hadoop2.6 spark && \ - echo Spark ${version} installed in /opt + wget http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \ + tar -zvxf hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \ + rm hadoop-${hadoop_ver}.tar.gz && \ + ln -s hadoop-${hadoop_ver} hadoop && \ + echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native + +# Get Spark from US Apache mirror. +RUN mkdir -p /opt && \ + cd /opt && \ + wget http://www.us.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop2.6.tgz && \ + tar -zvxf spark-${spark_ver}-bin-hadoop2.6.tgz && \ + rm spark-${spark_ver}-bin-hadoop2.6.tgz && \ + ln -s spark-${spark_ver}-bin-hadoop2.6 spark && \ + echo Spark ${spark_ver} installed in /opt + +# Add the GCS connector. +RUN wget -O /opt/spark/lib/gcs-connector-latest-hadoop2.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar ADD log4j.properties /opt/spark/conf/log4j.properties ADD setup_client.sh / +ADD start-common.sh / +ADD core-site.xml /opt/spark/conf/core-site.xml +ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf ENV PATH $PATH:/opt/spark/bin diff --git a/examples/spark/images/base/core-site.xml b/examples/spark/images/base/core-site.xml new file mode 100644 index 00000000000..2fecabedc8f --- /dev/null +++ b/examples/spark/images/base/core-site.xml @@ -0,0 +1,19 @@ + + + + + + fs.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem + The FileSystem for gs: (GCS) uris. + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2. + + + fs.gs.project.id + NOT_RUNNING_INSIDE_GCE + + diff --git a/examples/spark/images/base/spark-defaults.conf b/examples/spark/images/base/spark-defaults.conf new file mode 100644 index 00000000000..f5d3d98504c --- /dev/null +++ b/examples/spark/images/base/spark-defaults.conf @@ -0,0 +1,5 @@ +spark.master spark://spark-master:7077 +spark.executor.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar +spark.driver.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar +spark.driver.extraLibraryPath /opt/hadoop/lib/native +spark.app.id KubernetesSpark diff --git a/examples/spark/images/base/start-common.sh b/examples/spark/images/base/start-common.sh new file mode 100755 index 00000000000..ac8d5058386 --- /dev/null +++ b/examples/spark/images/base/start-common.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Copyright 2015 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id) + +if [[ -n "${PROJECT_ID}" ]]; then + sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml +fi + +# We don't want any of the incoming service variables, we'd rather use +# DNS. But this one interferes directly with Spark. +unset SPARK_MASTER_PORT + +# spark.{executor,driver}.extraLibraryPath don't actually seem to +# work, this seems to be the only reliable way to get the native libs +# picked up. +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native diff --git a/examples/spark/images/driver/Dockerfile b/examples/spark/images/driver/Dockerfile index 522cb333b29..7dc33857321 100644 --- a/examples/spark/images/driver/Dockerfile +++ b/examples/spark/images/driver/Dockerfile @@ -1,3 +1,3 @@ -FROM gcr.io/google_containers/spark-base:1.5.1_v1 +FROM gcr.io/google_containers/spark-base:latest ADD start.sh /start.sh CMD ["/start.sh"] diff --git a/examples/spark/images/driver/start.sh b/examples/spark/images/driver/start.sh index 71e5a470d37..89d4fcec00f 100755 --- a/examples/spark/images/driver/start.sh +++ b/examples/spark/images/driver/start.sh @@ -14,9 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts -echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh -echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh +. /start-common.sh + echo "Use kubectl exec spark-driver -it bash to invoke commands" while true; do sleep 100 diff --git a/examples/spark/images/master/Dockerfile b/examples/spark/images/master/Dockerfile index 9003b4c14ea..d63cc29de7f 100644 --- a/examples/spark/images/master/Dockerfile +++ b/examples/spark/images/master/Dockerfile @@ -1,7 +1,7 @@ -FROM gcr.io/google_containers/spark-base:1.5.1_v1 +FROM gcr.io/google_containers/spark-base:latest ADD start.sh / ADD log4j.properties /opt/spark/conf/log4j.properties -EXPOSE 7077 +EXPOSE 7077 8080 ENTRYPOINT ["/start.sh"] diff --git a/examples/spark/images/master/start.sh b/examples/spark/images/master/start.sh index 59b225f77e7..4cb36511198 100755 --- a/examples/spark/images/master/start.sh +++ b/examples/spark/images/master/start.sh @@ -14,6 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -export SPARK_MASTER_PORT=${SPARK_MASTER_SERVICE_PORT:-7077} +. /start-common.sh + +echo "$(hostname -i) spark-master" >> /etc/hosts +export SPARK_LOCAL_HOSTNAME=spark-master +export SPARK_MASTER_IP=spark-master + /opt/spark/sbin/start-master.sh tail -F /opt/spark/logs/* diff --git a/examples/spark/images/worker/Dockerfile b/examples/spark/images/worker/Dockerfile index d0874ad6f13..99a194a1708 100644 --- a/examples/spark/images/worker/Dockerfile +++ b/examples/spark/images/worker/Dockerfile @@ -1,4 +1,4 @@ -FROM gcr.io/google_containers/spark-base:1.5.1_v1 +FROM gcr.io/google_containers/spark-base:latest ADD start.sh / ADD log4j.properties /opt/spark/conf/log4j.properties diff --git a/examples/spark/images/worker/start.sh b/examples/spark/images/worker/start.sh index 3643ea0a3da..6868518701f 100755 --- a/examples/spark/images/worker/start.sh +++ b/examples/spark/images/worker/start.sh @@ -14,15 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -if [[ ${SPARK_MASTER_SERVICE_HOST} == "" ]]; then - echo "Spark Master service must be created before starting any workers" - sleep 30 # To postpone pod restart - exit 1 -fi +. /start-common.sh -echo "${SPARK_MASTER_SERVICE_HOST} spark-master" >> /etc/hosts -export SPARK_LOCAL_HOSTNAME=$(hostname -i) - -/opt/spark/sbin/start-slave.sh spark://spark-master:${SPARK_MASTER_SERVICE_PORT} +/opt/spark/sbin/start-slave.sh spark://spark-master:7077 tail -F /opt/spark/logs/* diff --git a/examples/spark/spark-driver-controller.yaml b/examples/spark/spark-driver-controller.yaml new file mode 100644 index 00000000000..d62b3e21969 --- /dev/null +++ b/examples/spark/spark-driver-controller.yaml @@ -0,0 +1,21 @@ +kind: ReplicationController +apiVersion: v1 +metadata: + name: spark-driver-controller + labels: + component: spark-driver +spec: + replicas: 1 + selector: + component: spark-driver + template: + metadata: + labels: + component: spark-driver + spec: + containers: + - name: spark-driver + image: gcr.io/google_containers/spark-driver:1.5.1_v1 + resources: + requests: + cpu: 100m diff --git a/examples/spark/spark-driver.json b/examples/spark/spark-driver.json deleted file mode 100644 index 9911cafe9fb..00000000000 --- a/examples/spark/spark-driver.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "kind": "Pod", - "apiVersion": "v1", - "metadata": { - "name": "spark-driver", - "labels": { - "name": "spark-driver" - } - }, - "spec": { - "containers": [ - { - "name": "spark-driver", - "image": "gcr.io/google_containers/spark-driver:1.5.1_v1", - "resources": { - "limits": { - "cpu": "100m" - } - } - } - ] - } -} diff --git a/examples/spark/spark-gluster/spark-master-controller.yaml b/examples/spark/spark-gluster/spark-master-controller.yaml index a26eeeec0f5..3569329d36c 100644 --- a/examples/spark/spark-gluster/spark-master-controller.yaml +++ b/examples/spark/spark-gluster/spark-master-controller.yaml @@ -3,7 +3,7 @@ apiVersion: v1 metadata: name: spark-master-controller labels: - component: spark-master-controller + component: spark-master spec: replicas: 1 selector: @@ -22,7 +22,7 @@ spec: - mountPath: /mnt/glusterfs name: glusterfsvol resources: - limits: + requests: cpu: 100m volumes: - name: glusterfsvol diff --git a/examples/spark/spark-gluster/spark-master-service.yaml b/examples/spark/spark-gluster/spark-master-service.yaml index 6b95e967dc1..ec51365b39d 100644 --- a/examples/spark/spark-gluster/spark-master-service.yaml +++ b/examples/spark/spark-gluster/spark-master-service.yaml @@ -9,4 +9,4 @@ spec: - port: 7077 targetPort: 7077 selector: - component: spark-master-controller + component: spark-master diff --git a/examples/spark/spark-gluster/spark-worker-controller.yaml b/examples/spark/spark-gluster/spark-worker-controller.yaml index 126448320e5..a85884d6e1d 100644 --- a/examples/spark/spark-gluster/spark-worker-controller.yaml +++ b/examples/spark/spark-gluster/spark-worker-controller.yaml @@ -12,7 +12,7 @@ spec: metadata: labels: component: spark-worker - uses: spark-master-controller + uses: spark-master spec: containers: - name: spark-worker @@ -23,7 +23,7 @@ spec: - mountPath: /mnt/glusterfs name: glusterfsvol resources: - limits: + requests: cpu: 100m volumes: - name: glusterfsvol diff --git a/examples/spark/spark-master-controller.yaml b/examples/spark/spark-master-controller.yaml new file mode 100644 index 00000000000..b67c1424dca --- /dev/null +++ b/examples/spark/spark-master-controller.yaml @@ -0,0 +1,24 @@ +kind: ReplicationController +apiVersion: v1 +metadata: + name: spark-master-controller + labels: + component: spark-master +spec: + replicas: 1 + selector: + component: spark-master + template: + metadata: + labels: + component: spark-master + spec: + containers: + - name: spark-master + image: gcr.io/google_containers/spark-master:1.5.1_v1 + ports: + - containerPort: 7077 + - containerPort: 8080 + resources: + requests: + cpu: 100m diff --git a/examples/spark/spark-master-service.json b/examples/spark/spark-master-service.json deleted file mode 100644 index 168bdb3c21b..00000000000 --- a/examples/spark/spark-master-service.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "kind": "Service", - "apiVersion": "v1", - "metadata": { - "name": "spark-master", - "labels": { - "name": "spark-master" - } - }, - "spec": { - "ports": [ - { - "port": 7077, - "targetPort": 7077 - } - ], - "selector": { - "name": "spark-master" - } - } -} diff --git a/examples/spark/spark-master-service.yaml b/examples/spark/spark-master-service.yaml new file mode 100644 index 00000000000..ec51365b39d --- /dev/null +++ b/examples/spark/spark-master-service.yaml @@ -0,0 +1,12 @@ +kind: Service +apiVersion: v1 +metadata: + name: spark-master + labels: + component: spark-master-service +spec: + ports: + - port: 7077 + targetPort: 7077 + selector: + component: spark-master diff --git a/examples/spark/spark-master.json b/examples/spark/spark-master.json deleted file mode 100644 index b25122d0ab0..00000000000 --- a/examples/spark/spark-master.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "kind": "Pod", - "apiVersion": "v1", - "metadata": { - "name": "spark-master", - "labels": { - "name": "spark-master" - } - }, - "spec": { - "containers": [ - { - "name": "spark-master", - "image": "gcr.io/google_containers/spark-master:1.5.1_v1", - "ports": [ - { - "containerPort": 7077 - } - ], - "resources": { - "limits": { - "cpu": "100m" - } - } - } - ] - } -} diff --git a/examples/spark/spark-webui.yaml b/examples/spark/spark-webui.yaml new file mode 100644 index 00000000000..28e8abe435b --- /dev/null +++ b/examples/spark/spark-webui.yaml @@ -0,0 +1,11 @@ +kind: Service +apiVersion: v1 +metadata: + name: spark-webui +spec: + ports: + - port: 8080 + targetPort: 8080 + selector: + component: spark-master + type: LoadBalancer diff --git a/examples/spark/spark-worker-controller.json b/examples/spark/spark-worker-controller.json deleted file mode 100644 index 1d25b9efb55..00000000000 --- a/examples/spark/spark-worker-controller.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "kind": "ReplicationController", - "apiVersion": "v1", - "metadata": { - "name": "spark-worker-controller", - "labels": { - "name": "spark-worker" - } - }, - "spec": { - "replicas": 3, - "selector": { - "name": "spark-worker" - }, - "template": { - "metadata": { - "labels": { - "name": "spark-worker", - "uses": "spark-master" - } - }, - "spec": { - "containers": [ - { - "name": "spark-worker", - "image": "gcr.io/google_containers/spark-worker:1.5.1_v1", - "ports": [ - { - "hostPort": 8888, - "containerPort": 8888 - } - ], - "resources": { - "limits": { - "cpu": "100m" - } - } - } - ] - } - } - } -} diff --git a/examples/spark/spark-worker-controller.yaml b/examples/spark/spark-worker-controller.yaml new file mode 100644 index 00000000000..1fa59f04c20 --- /dev/null +++ b/examples/spark/spark-worker-controller.yaml @@ -0,0 +1,24 @@ +kind: ReplicationController +apiVersion: v1 +metadata: + name: spark-worker-controller + labels: + component: spark-worker +spec: + replicas: 3 + selector: + component: spark-worker + template: + metadata: + labels: + component: spark-worker + uses: spark-master + spec: + containers: + - name: spark-worker + image: gcr.io/google_containers/spark-worker:1.5.1_v1 + ports: + - containerPort: 8888 + resources: + requests: + cpu: 100m