Refactored KubernetesSeedProvider and added unit tests. Updated Docker image and bumped Cassandra version

This commit is contained in:
chrislovecnm
2016-04-28 13:26:45 -06:00
parent 7a725418af
commit e8ce426093
13 changed files with 642 additions and 290 deletions

View File

@@ -12,32 +12,33 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM google/debian:wheezy
FROM google/debian:jessie
COPY cassandra.list /etc/apt/sources.list.d/cassandra.list
COPY run.sh /run.sh
RUN gpg --keyserver pgp.mit.edu --recv-keys F758CE318D77295D
RUN gpg --export --armor F758CE318D77295D | apt-key add -
RUN gpg --keyserver pgp.mit.edu --recv-keys 2B5C1B00
RUN gpg --export --armor 2B5C1B00 | apt-key add -
RUN gpg --keyserver pgp.mit.edu --recv-keys 0353B12C
RUN gpg --export --armor 0353B12C | apt-key add -
RUN apt-get update
RUN apt-get -qq -y install procps cassandra
RUN gpg --keyserver pgp.mit.edu --recv-keys F758CE318D77295D && \
gpg --export --armor F758CE318D77295D | apt-key add - && \
gpg --keyserver pgp.mit.edu --recv-keys 2B5C1B00 && \
gpg --export --armor 2B5C1B00 | apt-key add - && \
gpg --keyserver pgp.mit.edu --recv-keys 0353B12C && \
gpg --export --armor 0353B12C | apt-key add - && \
apt-get update && \
apt-get -qq -y install procps cassandra openjdk-8-jre-headless && \
chmod a+rx /run.sh && \
mkdir -p /cassandra_data/data && \
chown -R cassandra.cassandra /etc/cassandra /cassandra_data && \
chmod o+w -R /etc/cassandra /cassandra_data && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /usr/share/doc/ && \
rm -rf /usr/share/doc-base/ && \
rm -rf /usr/share/man/ && \
rm -rf /tmp/*
COPY cassandra.yaml /etc/cassandra/cassandra.yaml
COPY logback.xml /etc/cassandra/logback.xml
COPY run.sh /run.sh
COPY kubernetes-cassandra.jar /kubernetes-cassandra.jar
RUN chmod a+rx /run.sh && \
mkdir -p /cassandra_data/data && \
chown -R cassandra.cassandra /etc/cassandra /cassandra_data && \
chmod o+w -R /etc/cassandra /cassandra_data
VOLUME ["/cassandra_data/data"]
USER cassandra

View File

@@ -14,11 +14,11 @@
# build the cassandra image.
VERSION=v8
VERSION=v9
all: build
kubernetes-cassandra.jar: ../java/* ../java/src/io/k8s/cassandra/*.java
kubernetes-cassandra.jar: ../java/* ../java/src/main/java/io/k8s/cassandra/*.java
cd ../java && mvn package
mv ../java/target/kubernetes-cassandra*.jar kubernetes-cassandra.jar
cd ../java && mvn clean

View File

@@ -1,3 +1,5 @@
deb http://www.apache.org/dist/cassandra/debian 21x main
deb-src http://www.apache.org/dist/cassandra/debian 21x main
deb http://www.apache.org/dist/cassandra/debian 34x main
deb-src http://www.apache.org/dist/cassandra/debian 34x main
# for jre8
deb http://http.debian.net/debian jessie-backports main

View File

@@ -1,4 +1,4 @@
# Cassandra storage config YAML
# Cassandra storage config YAML
# NOTE:
# See http://wiki.apache.org/cassandra/StorageConfiguration for
@@ -20,13 +20,13 @@ cluster_name: 'Test Cluster'
# Specifying initial_token will override this setting on the node's initial start,
# on subsequent starts, this setting will apply even if initial token is set.
#
# If you already have a cluster with 1 token per node, and wish to migrate to
# If you already have a cluster with 1 token per node, and wish to migrate to
# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
num_tokens: 256
# initial_token allows you to specify tokens manually. While you can use # it with
# vnodes (num_tokens > 1, above) -- in which case you should provide a
# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
# vnodes (num_tokens > 1, above) -- in which case you should provide a
# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
# that do not have vnodes enabled.
# initial_token:
@@ -157,7 +157,7 @@ key_cache_save_period: 14400
# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
#
# Default value is 0, to disable row caching.
row_cache_size_in_mb: 0
row_cache_size_in_mb: 16
# Duration in seconds after which Cassandra should
# save the row cache. Caches are saved to saved_caches_directory as specified
@@ -204,7 +204,7 @@ counter_cache_save_period: 7200
# well as caches. Experiments show that JEMAlloc saves some memory
# than the native GCC allocator (i.e., JEMalloc is more
# fragmentation-resistant).
#
#
# Supported values are: NativeAllocator, JEMallocAllocator
#
# If you intend to use JEMallocAllocator you have to install JEMalloc as library and
@@ -217,14 +217,14 @@ counter_cache_save_period: 7200
# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
saved_caches_directory: /cassandra_data/saved_caches
# commitlog_sync may be either "periodic" or "batch."
# commitlog_sync may be either "periodic" or "batch."
# When in batch mode, Cassandra won't ack writes until the commit log
# has been fsynced to disk. It will wait up to
# commitlog_sync_batch_window_in_ms milliseconds for other writes, before
# performing the sync.
#
# commitlog_sync: batch
# commitlog_sync_batch_window_in_ms: 50
# commitlog_sync_batch_window_in_ms: 1.0
#
# the other option is "periodic" where writes may be acked immediately
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
@@ -239,7 +239,7 @@ commitlog_sync_period_in_ms: 10000
# The size of the individual commitlog file segments. A commitlog
# segment may be archived, deleted, or recycled once all the data
# in it (potentially from each columnfamily in the system) has been
# flushed to sstables.
# flushed to sstables.
#
# The default size is 32, which is almost always fine, but if you are
# archiving commitlog segments (see commitlog_archiving.properties),
@@ -250,11 +250,11 @@ commitlog_segment_size_in_mb: 32
# any class that implements the SeedProvider interface and has a
# constructor that takes a Map<String, String> of parameters will do.
seed_provider:
# Addresses of hosts that are deemed contact points.
# Addresses of hosts that are deemed contact points.
# Cassandra nodes use this list of hosts to find each other and learn
# the topology of the ring. You must change this if you are running
# multiple nodes!
- class_name: io.k8s.cassandra.KubernetesSeedProvider
- class_name: io.k8s.cassandra.KubernetesSeedProvider
parameters:
# seeds is actually a comma-delimited list of addresses.
# Ex: "<ip1>,<ip2>,<ip3>"
@@ -279,7 +279,7 @@ concurrent_counter_writes: 32
# the smaller of 1/4 of heap or 512MB.
# file_cache_size_in_mb: 512
# Total permitted memory to use for memtables. Cassandra will stop
# Total permitted memory to use for memtables. Cassandra will stop
# accepting writes when the limit is exceeded until a flush completes,
# and will trigger a flush based on memtable_cleanup_threshold
# If omitted, Cassandra will set both to 1/4 the size of the heap.
@@ -300,7 +300,7 @@ concurrent_counter_writes: 32
# heap_buffers: on heap nio buffers
# offheap_buffers: off heap (direct) nio buffers
# offheap_objects: native memory, eliminating nio buffer heap overhead
memtable_allocation_type: heap_buffers
memtable_allocation_type: offheap_objects
# Total space to use for commitlogs. Since commitlog segments are
# mmapped, and hence use up address space, the default size is 32
@@ -314,11 +314,11 @@ memtable_allocation_type: heap_buffers
# This sets the amount of memtable flush writer threads. These will
# be blocked by disk io, and each one will hold a memtable in memory
# while blocked.
# while blocked.
#
# memtable_flush_writers defaults to the smaller of (number of disks,
# number of cores), with a minimum of 2 and a maximum of 8.
#
#
# If your data directories are backed by SSD, you should increase this
# to the number of cores.
#memtable_flush_writers: 8
@@ -476,7 +476,7 @@ thrift_framed_transport_size_in_mb: 15
# flushed or streamed locally in a backups/ subdirectory of the
# keyspace data. Removing these links is the operator's
# responsibility.
incremental_backups: false
incremental_backups: true
# Whether or not to take a snapshot before each compaction. Be
# careful using this option, since Cassandra won't clean up the
@@ -485,7 +485,7 @@ incremental_backups: false
snapshot_before_compaction: false
# Whether or not a snapshot is taken of the data before keyspace truncation
# or dropping of column families. The STRONGLY advised default of true
# or dropping of column families. The STRONGLY advised default of true
# should be used to provide data safety. If you set this flag to false, you will
# lose data on truncation or drop.
auto_snapshot: true
@@ -529,9 +529,10 @@ batch_size_warn_threshold_in_kb: 5
#
# concurrent_compactors defaults to the smaller of (number of disks,
# number of cores), with a minimum of 2 and a maximum of 8.
#
#
# If your data directories are backed by SSD, you should increase this
# to the number of cores.
# TODO: set this based on env??
#concurrent_compactors: 1
# Throttles compaction to the given total throughput across the entire
@@ -544,7 +545,7 @@ compaction_throughput_mb_per_sec: 16
# When compacting, the replacement sstable(s) can be opened before they
# are completely written, and used in place of the prior sstables for
# any range that has been written. This helps to smoothly transfer reads
# any range that has been written. This helps to smoothly transfer reads
# between the sstables, reducing page cache churn and keeping hot rows hot
sstable_preemptive_open_interval_in_mb: 50
@@ -582,7 +583,7 @@ request_timeout_in_ms: 10000
# Enable operation timeout information exchange between nodes to accurately
# measure request timeouts. If disabled, replicas will assume that requests
# were forwarded to them instantly by the coordinator, which means that
# under overload conditions we will waste that much extra time processing
# under overload conditions we will waste that much extra time processing
# already-timed-out requests.
#
# Warning: before enabling this property make sure to ntp is installed
@@ -654,7 +655,7 @@ endpoint_snitch: SimpleSnitch
# controls how often to perform the more expensive part of host score
# calculation
dynamic_snitch_update_interval_in_ms: 100
dynamic_snitch_update_interval_in_ms: 100
# controls how often to reset all host scores, allowing a bad host to
# possibly recover
dynamic_snitch_reset_interval_in_ms: 600000
@@ -678,13 +679,15 @@ dynamic_snitch_badness_threshold: 0.1
# client requests to a node with a separate queue for each
# request_scheduler_id. The scheduler is further customized by
# request_scheduler_options as described below.
request_scheduler: org.apache.cassandra.scheduler.NoScheduler
request_scheduler: org.apache.cassandra.scheduler.RoundRobinScheduler
request_scheduler_id: keyspace
# Scheduler Options vary based on the type of scheduler
# NoScheduler - Has no options
# RoundRobin
# - throttle_limit -- The throttle_limit is the number of in-flight
# requests per client. Requests beyond
# requests per client. Requests beyond
# that limit are queued up until
# running requests can complete.
# The value of 80 here is twice the number of
@@ -762,3 +765,10 @@ internode_compression: all
# reducing overhead from the TCP protocol itself, at the cost of increasing
# latency if you block for cross-datacenter responses.
inter_dc_tcp_nodelay: false
disk_access_mode: mmap
row_cache_class_name: org.apache.cassandra.cache.OHCProvider
# Not till 3.5
#enable_user_defined_functions: true
#enable_scripted_user_defined_functions: tru