move spark images to github.com/kubernetes/application-images/spark

This commit is contained in:
Matthew Farrellee 2016-01-16 11:15:23 -05:00
parent 3816c1798f
commit 0294bc2be7
21 changed files with 0 additions and 450 deletions

View File

@ -1,61 +0,0 @@
all: spark zeppelin
push: push-spark push-zeppelin
.PHONY: push push-spark push-zeppelin spark zeppelin zeppelin-build
# To bump the Spark version, bump the version in base/Dockerfile, bump
# the version in zeppelin/Dockerfile, bump this tag and reset to
# v1. You should also double check the native Hadoop libs at that
# point (we grab the 2.6.1 libs, which are appropriate for
# 1.5.1-with-2.6). Note that you'll need to re-test Zeppelin (and it
# may not have caught up to newest Spark).
TAG = 1.5.1_v2
# To bump the Zeppelin version, bump the version in
# zeppelin/Dockerfile and bump this tag and reset to v1.
ZEPPELIN_TAG = v0.5.5_v3
spark:
docker build -t gcr.io/google_containers/spark-base base
docker tag gcr.io/google_containers/spark-base gcr.io/google_containers/spark-base:$(TAG)
docker build -t gcr.io/google_containers/spark-worker worker
docker tag gcr.io/google_containers/spark-worker gcr.io/google_containers/spark-worker:$(TAG)
docker build -t gcr.io/google_containers/spark-master master
docker tag gcr.io/google_containers/spark-master gcr.io/google_containers/spark-master:$(TAG)
docker build -t gcr.io/google_containers/spark-driver driver
docker tag gcr.io/google_containers/spark-driver gcr.io/google_containers/spark-driver:$(TAG)
# This target is useful when needing to use an unreleased version of Zeppelin
zeppelin-build:
docker build -t gcr.io/google_containers/zeppelin-build zeppelin-build
docker tag -f gcr.io/google_containers/zeppelin-build gcr.io/google_containers/zeppelin-build:$(ZEPPELIN_TAG)
zeppelin:
docker build -t gcr.io/google_containers/zeppelin zeppelin
docker tag -f gcr.io/google_containers/zeppelin gcr.io/google_containers/zeppelin:$(ZEPPELIN_TAG)
push-spark: spark
gcloud docker push gcr.io/google_containers/spark-base
gcloud docker push gcr.io/google_containers/spark-base:$(TAG)
gcloud docker push gcr.io/google_containers/spark-worker
gcloud docker push gcr.io/google_containers/spark-worker:$(TAG)
gcloud docker push gcr.io/google_containers/spark-master
gcloud docker push gcr.io/google_containers/spark-master:$(TAG)
gcloud docker push gcr.io/google_containers/spark-driver
gcloud docker push gcr.io/google_containers/spark-driver:$(TAG)
push-zeppelin: zeppelin
gcloud docker push gcr.io/google_containers/zeppelin
gcloud docker push gcr.io/google_containers/zeppelin:$(ZEPPELIN_TAG)
clean:
docker rmi gcr.io/google_containers/spark-worker:$(TAG) || :
docker rmi gcr.io/google_containers/spark-worker || :
docker rmi gcr.io/google_containers/spark-master:$(TAG) || :
docker rmi gcr.io/google_containers/spark-master || :
docker rmi gcr.io/google_containers/spark-driver:$(TAG) || :
docker rmi gcr.io/google_containers/spark-driver || :
docker rmi gcr.io/google_containers/spark-base:$(TAG) || :
docker rmi gcr.io/google_containers/spark-base || :
docker rmi gcr.io/google_containers/zeppelin:$(ZEPPELIN_TAG) || :
docker rmi gcr.io/google_containers/zeppelin || :

View File

@ -1,39 +0,0 @@
FROM java:openjdk-8-jdk
ENV hadoop_ver 2.6.1
ENV spark_ver 1.5.1
# Get Hadoop from US Apache mirror and extract just the native
# libs. (Until we care about running HDFS with these containers, this
# is all we need.)
RUN mkdir -p /opt && \
cd /opt && \
curl http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz | \
tar -zx hadoop-${hadoop_ver}/lib/native && \
ln -s hadoop-${hadoop_ver} hadoop && \
echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
# Get Spark from US Apache mirror.
RUN mkdir -p /opt && \
cd /opt && \
curl http://www.us.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop2.6.tgz | \
tar -zx && \
ln -s spark-${spark_ver}-bin-hadoop2.6 spark && \
echo Spark ${spark_ver} installed in /opt
# Add the GCS connector.
RUN cd /opt/spark/lib && \
curl -O https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar
# if numpy is installed on a driver it needs to be installed on all
# workers, so install it everywhere
RUN apt-get update && \
apt-get install -y python-numpy && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ADD log4j.properties /opt/spark/conf/log4j.properties
ADD start-common.sh /
ADD core-site.xml /opt/spark/conf/core-site.xml
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
ENV PATH $PATH:/opt/spark/bin

View File

@ -1,19 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
<description>The FileSystem for gs: (GCS) uris.</description>
</property>
<property>
<name>fs.AbstractFileSystem.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
<description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
</property>
<property>
<name>fs.gs.project.id</name>
<value>NOT_RUNNING_INSIDE_GCE</value>
</property>
</configuration>

View File

@ -1,12 +0,0 @@
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@ -1,5 +0,0 @@
spark.master spark://spark-master:7077
spark.executor.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
spark.driver.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
spark.driver.extraLibraryPath /opt/hadoop/lib/native
spark.app.id KubernetesSpark

View File

@ -1,30 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
if [[ -n "${PROJECT_ID}" ]]; then
sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
fi
# We don't want any of the incoming service variables, we'd rather use
# DNS. But this one interferes directly with Spark.
unset SPARK_MASTER_PORT
# spark.{executor,driver}.extraLibraryPath don't actually seem to
# work, this seems to be the only reliable way to get the native libs
# picked up.
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native

View File

@ -1,3 +0,0 @@
FROM gcr.io/google_containers/spark-base:latest
ADD start.sh /start.sh
CMD ["/start.sh"]

View File

@ -1,38 +0,0 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
<!-- TAG RELEASE_LINK, added by the munger automatically -->
<strong>
The latest release of this document can be found
[here](http://releases.k8s.io/release-1.1/examples/spark/images/driver/README.md).
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/images/driver/README.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
. /start-common.sh
echo "Use kubectl exec spark-driver -it bash to invoke commands"
while true; do
sleep 100
done

View File

@ -1,7 +0,0 @@
FROM gcr.io/google_containers/spark-base:latest
ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 7077 8080
ENTRYPOINT ["/start.sh"]

View File

@ -1,12 +0,0 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
. /start-common.sh
echo "$(hostname -i) spark-master" >> /etc/hosts
# Run spark-class directly so that when it exits (or crashes), the pod restarts.
/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080

View File

@ -1,7 +0,0 @@
FROM gcr.io/google_containers/spark-base:latest
ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 8081
ENTRYPOINT ["/start.sh"]

View File

@ -1,12 +0,0 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

View File

@ -1,20 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
. /start-common.sh
# Run spark-class directly so that when it exits (or crashes), the pod restarts.
/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 --webui-port 8081

View File

@ -1,54 +0,0 @@
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is the Zeppelin *build* image. It spits out a /zeppelin.tgz
# alone, which is then copied out by the Makefile and used in the
# actual Zeppelin image.
#
# Based heavily on
# https://github.com/dylanmei/docker-zeppelin/blob/master/Dockerfile
# (which is similar to many others out there), but rebased onto maven
# image.
FROM maven:3.3.3-jdk-8
ENV ZEPPELIN_TAG v0.5.5
ENV SPARK_MINOR 1.5
ENV SPARK_PATCH 1
ENV SPARK_VER ${SPARK_MINOR}.${SPARK_PATCH}
ENV HADOOP_MINOR 2.6
ENV HADOOP_PATCH 1
ENV HADOOP_VER ${HADOOP_MINOR}.${HADOOP_PATCH}
# libfontconfig is a workaround for
# https://github.com/karma-runner/karma/issues/1270, which caused a
# build break similar to
# https://www.mail-archive.com/users@zeppelin.incubator.apache.org/msg01586.html
RUN apt-get update \
&& apt-get install -y net-tools build-essential git wget unzip python python-setuptools python-dev python-numpy libfontconfig
RUN git clone https://github.com/apache/incubator-zeppelin.git --branch ${ZEPPELIN_TAG} /opt/zeppelin
RUN cd /opt/zeppelin && \
mvn clean package \
-Pbuild-distr \
-Pspark-${SPARK_MINOR} -Dspark.version=${SPARK_VER} \
-Phadoop-${HADOOP_MINOR} -Dhadoop.version=${HADOOP_VER} \
-Ppyspark \
-DskipTests && \
echo "Successfully built Zeppelin"
RUN cd /opt/zeppelin/zeppelin-distribution/target/zeppelin-* && \
mv zeppelin-* zeppelin && \
tar cvzf /zeppelin.tgz zeppelin

View File

@ -1 +0,0 @@
zeppelin.tgz

View File

@ -1,33 +0,0 @@
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This image relies on the zeppelin-build image to get the zeppelin
# binaries built, and the Makefile to put it in this directory.
FROM gcr.io/google_containers/spark-base:latest
ENV ZEPPELIN_VER 0.5.5-incubating
RUN mkdir -p /opt && \
cd /opt && \
curl http://www.us.apache.org/dist/incubator/zeppelin/${ZEPPELIN_VER}/zeppelin-${ZEPPELIN_VER}-bin-all.tgz | \
tar -zx && \
ln -s zeppelin-${ZEPPELIN_VER}-bin-all zeppelin && \
echo Zeppelin ${ZEPPELIN_VER} installed in /opt
ADD zeppelin-log4j.properties /opt/zeppelin/conf/log4j.properties
ADD zeppelin-env.sh /opt/zeppelin/conf/zeppelin-env.sh
ADD docker-zeppelin.sh /opt/zeppelin/bin/docker-zeppelin.sh
EXPOSE 8080
ENTRYPOINT ["/opt/zeppelin/bin/docker-zeppelin.sh"]

View File

@ -1,21 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export ZEPPELIN_HOME=/opt/zeppelin
export ZEPPELIN_CONF_DIR="${ZEPPELIN_HOME}/conf"
echo "=== Launching Zeppelin under Docker ==="
/opt/zeppelin/bin/zeppelin.sh "${ZEPPELIN_CONF_DIR}"

View File

@ -1,26 +0,0 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export MASTER="spark://spark-master:7077"
export SPARK_HOME=/opt/spark
export ZEPPELIN_JAVA_OPTS="-Dspark.jars=/opt/spark/lib/gcs-connector-latest-hadoop2.jar"
# TODO(zmerlynn): Setting global CLASSPATH *should* be unnecessary,
# but ZEPPELIN_JAVA_OPTS isn't enough here. :(
export CLASSPATH="/opt/spark/lib/gcs-connector-latest-hadoop2.jar"
export ZEPPELIN_NOTEBOOK_DIR="${ZEPPELIN_HOME}/notebook"
export ZEPPELIN_MEM=-Xmx1024m
export ZEPPELIN_PORT=8080
export PYTHONPATH="${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.8.2.1-src.zip"

View File

@ -1,6 +0,0 @@
# Set everything to be logged to the console.
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n