Updated spark examples with docker images moved to gcr.io/google_containers

This commit is contained in:
Marcin Wielgus
2015-06-26 18:00:42 +02:00
parent 0947b2715a
commit bfc4ee789d
12 changed files with 226 additions and 78 deletions

View File

@@ -0,0 +1,17 @@
FROM java:latest
RUN apt-get update -y
RUN apt-get install -y scala
# Get Spark from some apache mirror.
RUN mkdir -p /opt && \
cd /opt && \
wget http://apache.mirrors.pair.com/spark/spark-1.4.0/spark-1.4.0-bin-hadoop2.6.tgz && \
tar -zvxf spark-1.4.0-bin-hadoop2.6.tgz && \
rm spark-1.4.0-bin-hadoop2.6.tgz && \
ln -s spark-1.4.0-bin-hadoop2.6 spark && \
echo Spark installed in /opt
ADD log4j.properties /opt/spark/conf/log4j.properties
ADD setup_client.sh /
ENV PATH $PATH:/opt/spark/bin

View File

@@ -0,0 +1,12 @@
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@@ -0,0 +1,24 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [[ $# != 2 || $1 == "" || $2 == "" ]]; then
echo "Usage: . ./setup_client.sh master_address master_port"
exit 1
fi
echo "$1 spark-master" >> /etc/hosts
export SPARK_LOCAL_HOSTNAME=$(hostname -i)
export MASTER=spark://spark-master:$2

View File

@@ -0,0 +1,7 @@
FROM gcr.io/google_containers/spark-base
ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 7077
ENTRYPOINT ["/start.sh"]

View File

@@ -0,0 +1,12 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@@ -0,0 +1,19 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export SPARK_MASTER_PORT=${SPARK_MASTER_SERVICE_PORT:-7077}
/opt/spark/sbin/start-master.sh
tail -F /opt/spark/logs/*

View File

@@ -0,0 +1,7 @@
FROM gcr.io/google_containers/spark-base
ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 8080
ENTRYPOINT ["/start.sh"]

View File

@@ -0,0 +1,12 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [[ ${SPARK_MASTER_SERVICE_HOST} == "" ]]; then
echo "Spark Master service must be created before starting any workers"
sleep 30 # To postpone pod restart
exit 1
fi
echo "${SPARK_MASTER_SERVICE_HOST} spark-master" >> /etc/hosts
export SPARK_LOCAL_HOSTNAME=$(hostname -i)
/opt/spark/sbin/start-slave.sh spark://spark-master:${SPARK_MASTER_SERVICE_PORT}
tail -F /opt/spark/logs/*