Refactored KubernetesSeedProvider and added unit tests. Updated Docker image and bumped Cassandra version

2025-09-09 21:21:14 +00:00 · 2016-04-28 13:26:45 -06:00
parent 7a725418af
commit e8ce426093
13 changed files with 642 additions and 290 deletions
--- a/examples/cassandra/image/Dockerfile
+++ b/examples/cassandra/image/Dockerfile
@@ -12,32 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM google/debian:wheezy
+FROM google/debian:jessie

 COPY cassandra.list /etc/apt/sources.list.d/cassandra.list
+COPY run.sh /run.sh

-RUN gpg --keyserver pgp.mit.edu --recv-keys F758CE318D77295D
-RUN gpg --export --armor F758CE318D77295D | apt-key add -
-
-RUN gpg --keyserver pgp.mit.edu --recv-keys 2B5C1B00
-RUN gpg --export --armor 2B5C1B00 | apt-key add -
-
-RUN gpg --keyserver pgp.mit.edu --recv-keys 0353B12C
-RUN gpg --export --armor 0353B12C | apt-key add -
-
-RUN apt-get update
-RUN apt-get -qq -y install procps cassandra
+RUN gpg --keyserver pgp.mit.edu --recv-keys F758CE318D77295D && \
+  gpg --export --armor F758CE318D77295D | apt-key add - && \
+  gpg --keyserver pgp.mit.edu --recv-keys 2B5C1B00 && \
+  gpg --export --armor 2B5C1B00 | apt-key add - && \
+  gpg --keyserver pgp.mit.edu --recv-keys 0353B12C && \
+  gpg --export --armor 0353B12C | apt-key add - && \
+  apt-get update && \
+  apt-get -qq -y install procps  cassandra openjdk-8-jre-headless && \
+  chmod a+rx /run.sh && \
+  mkdir -p /cassandra_data/data && \
+  chown -R cassandra.cassandra /etc/cassandra /cassandra_data && \
+  chmod o+w -R /etc/cassandra /cassandra_data && \
+  rm -rf /var/lib/apt/lists/* && \
+  rm -rf /usr/share/doc/ && \
+  rm -rf /usr/share/doc-base/ && \
+  rm -rf /usr/share/man/ && \
+  rm -rf /tmp/* 

 COPY cassandra.yaml /etc/cassandra/cassandra.yaml
 COPY logback.xml /etc/cassandra/logback.xml
-COPY run.sh /run.sh
 COPY kubernetes-cassandra.jar /kubernetes-cassandra.jar

-RUN chmod a+rx /run.sh && \
-    mkdir -p /cassandra_data/data && \
-    chown -R cassandra.cassandra /etc/cassandra /cassandra_data && \
-    chmod o+w -R /etc/cassandra /cassandra_data
-
 VOLUME ["/cassandra_data/data"]    

 USER cassandra
--- a/examples/cassandra/image/Makefile
+++ b/examples/cassandra/image/Makefile
@@ -14,11 +14,11 @@

 # build the cassandra image.

-VERSION=v8
+VERSION=v9

 all: build

-kubernetes-cassandra.jar: ../java/* ../java/src/io/k8s/cassandra/*.java
+kubernetes-cassandra.jar: ../java/* ../java/src/main/java/io/k8s/cassandra/*.java
 	cd ../java && mvn package
 	mv ../java/target/kubernetes-cassandra*.jar kubernetes-cassandra.jar
 	cd ../java && mvn clean
--- a/examples/cassandra/image/cassandra.list
+++ b/examples/cassandra/image/cassandra.list
@@ -1,3 +1,5 @@
-deb http://www.apache.org/dist/cassandra/debian 21x main
-deb-src http://www.apache.org/dist/cassandra/debian 21x main
+deb http://www.apache.org/dist/cassandra/debian 34x main
+deb-src http://www.apache.org/dist/cassandra/debian 34x main

+# for jre8
+deb http://http.debian.net/debian jessie-backports main
--- a/examples/cassandra/image/cassandra.yaml
+++ b/examples/cassandra/image/cassandra.yaml
@@ -1,4 +1,4 @@
-# Cassandra storage config YAML 
+# Cassandra storage config YAML

 # NOTE:
 #   See http://wiki.apache.org/cassandra/StorageConfiguration for
@@ -20,13 +20,13 @@ cluster_name: 'Test Cluster'
 # Specifying initial_token will override this setting on the node's initial start,
 # on subsequent starts, this setting will apply even if initial token is set.
 #
-# If you already have a cluster with 1 token per node, and wish to migrate to 
+# If you already have a cluster with 1 token per node, and wish to migrate to
 # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
 num_tokens: 256

 # initial_token allows you to specify tokens manually.  While you can use # it with
-# vnodes (num_tokens > 1, above) -- in which case you should provide a 
-# comma-separated list -- it's primarily used when adding nodes # to legacy clusters 
+# vnodes (num_tokens > 1, above) -- in which case you should provide a
+# comma-separated list -- it's primarily used when adding nodes # to legacy clusters
 # that do not have vnodes enabled.
 # initial_token:

@@ -157,7 +157,7 @@ key_cache_save_period: 14400
 # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
 #
 # Default value is 0, to disable row caching.
-row_cache_size_in_mb: 0
+row_cache_size_in_mb: 16

 # Duration in seconds after which Cassandra should
 # save the row cache. Caches are saved to saved_caches_directory as specified
@@ -204,7 +204,7 @@ counter_cache_save_period: 7200
 # well as caches.  Experiments show that JEMAlloc saves some memory
 # than the native GCC allocator (i.e., JEMalloc is more
 # fragmentation-resistant).
-# 
+#
 # Supported values are: NativeAllocator, JEMallocAllocator
 #
 # If you intend to use JEMallocAllocator you have to install JEMalloc as library and
@@ -217,14 +217,14 @@ counter_cache_save_period: 7200
 # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches.
 saved_caches_directory: /cassandra_data/saved_caches

-# commitlog_sync may be either "periodic" or "batch." 
+# commitlog_sync may be either "periodic" or "batch."
 # When in batch mode, Cassandra won't ack writes until the commit log
 # has been fsynced to disk.  It will wait up to
 # commitlog_sync_batch_window_in_ms milliseconds for other writes, before
 # performing the sync.
 #
 # commitlog_sync: batch
-# commitlog_sync_batch_window_in_ms: 50
+# commitlog_sync_batch_window_in_ms: 1.0
 #
 # the other option is "periodic" where writes may be acked immediately
 # and the CommitLog is simply synced every commitlog_sync_period_in_ms
@@ -239,7 +239,7 @@ commitlog_sync_period_in_ms: 10000
 # The size of the individual commitlog file segments.  A commitlog
 # segment may be archived, deleted, or recycled once all the data
 # in it (potentially from each columnfamily in the system) has been
-# flushed to sstables.  
+# flushed to sstables.
 #
 # The default size is 32, which is almost always fine, but if you are
 # archiving commitlog segments (see commitlog_archiving.properties),
@@ -250,11 +250,11 @@ commitlog_segment_size_in_mb: 32
 # any class that implements the SeedProvider interface and has a
 # constructor that takes a Map<String, String> of parameters will do.
 seed_provider:
-    # Addresses of hosts that are deemed contact points. 
+    # Addresses of hosts that are deemed contact points.
    # Cassandra nodes use this list of hosts to find each other and learn
    # the topology of the ring.  You must change this if you are running
    # multiple nodes!
-    - class_name: io.k8s.cassandra.KubernetesSeedProvider 
+    - class_name: io.k8s.cassandra.KubernetesSeedProvider
      parameters:
          # seeds is actually a comma-delimited list of addresses.
          # Ex: "<ip1>,<ip2>,<ip3>"
@@ -279,7 +279,7 @@ concurrent_counter_writes: 32
 # the smaller of 1/4 of heap or 512MB.
 # file_cache_size_in_mb: 512

-# Total permitted memory to use for memtables. Cassandra will stop 
+# Total permitted memory to use for memtables. Cassandra will stop
 # accepting writes when the limit is exceeded until a flush completes,
 # and will trigger a flush based on memtable_cleanup_threshold
 # If omitted, Cassandra will set both to 1/4 the size of the heap.
@@ -300,7 +300,7 @@ concurrent_counter_writes: 32
 #   heap_buffers:    on heap nio buffers
 #   offheap_buffers: off heap (direct) nio buffers
 #   offheap_objects: native memory, eliminating nio buffer heap overhead
-memtable_allocation_type: heap_buffers
+memtable_allocation_type: offheap_objects

 # Total space to use for commitlogs.  Since commitlog segments are
 # mmapped, and hence use up address space, the default size is 32
@@ -314,11 +314,11 @@ memtable_allocation_type: heap_buffers

 # This sets the amount of memtable flush writer threads.  These will
 # be blocked by disk io, and each one will hold a memtable in memory
-# while blocked. 
+# while blocked.
 #
 # memtable_flush_writers defaults to the smaller of (number of disks,
 # number of cores), with a minimum of 2 and a maximum of 8.
-# 
+#
 # If your data directories are backed by SSD, you should increase this
 # to the number of cores.
 #memtable_flush_writers: 8
@@ -476,7 +476,7 @@ thrift_framed_transport_size_in_mb: 15
 # flushed or streamed locally in a backups/ subdirectory of the
 # keyspace data.  Removing these links is the operator's
 # responsibility.
-incremental_backups: false
+incremental_backups: true

 # Whether or not to take a snapshot before each compaction.  Be
 # careful using this option, since Cassandra won't clean up the
@@ -485,7 +485,7 @@ incremental_backups: false
 snapshot_before_compaction: false

 # Whether or not a snapshot is taken of the data before keyspace truncation
-# or dropping of column families. The STRONGLY advised default of true 
+# or dropping of column families. The STRONGLY advised default of true
 # should be used to provide data safety. If you set this flag to false, you will
 # lose data on truncation or drop.
 auto_snapshot: true
@@ -529,9 +529,10 @@ batch_size_warn_threshold_in_kb: 5
 #
 # concurrent_compactors defaults to the smaller of (number of disks,
 # number of cores), with a minimum of 2 and a maximum of 8.
-# 
+#
 # If your data directories are backed by SSD, you should increase this
 # to the number of cores.
+# TODO: set this based on env??
 #concurrent_compactors: 1

 # Throttles compaction to the given total throughput across the entire
@@ -544,7 +545,7 @@ compaction_throughput_mb_per_sec: 16

 # When compacting, the replacement sstable(s) can be opened before they
 # are completely written, and used in place of the prior sstables for
-# any range that has been written. This helps to smoothly transfer reads 
+# any range that has been written. This helps to smoothly transfer reads
 # between the sstables, reducing page cache churn and keeping hot rows hot
 sstable_preemptive_open_interval_in_mb: 50

@@ -582,7 +583,7 @@ request_timeout_in_ms: 10000
 # Enable operation timeout information exchange between nodes to accurately
 # measure request timeouts.  If disabled, replicas will assume that requests
 # were forwarded to them instantly by the coordinator, which means that
-# under overload conditions we will waste that much extra time processing 
+# under overload conditions we will waste that much extra time processing
 # already-timed-out requests.
 #
 # Warning: before enabling this property make sure to ntp is installed
@@ -654,7 +655,7 @@ endpoint_snitch: SimpleSnitch

 # controls how often to perform the more expensive part of host score
 # calculation
-dynamic_snitch_update_interval_in_ms: 100 
+dynamic_snitch_update_interval_in_ms: 100
 # controls how often to reset all host scores, allowing a bad host to
 # possibly recover
 dynamic_snitch_reset_interval_in_ms: 600000
@@ -678,13 +679,15 @@ dynamic_snitch_badness_threshold: 0.1
 # client requests to a node with a separate queue for each
 # request_scheduler_id. The scheduler is further customized by
 # request_scheduler_options as described below.
-request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+request_scheduler: org.apache.cassandra.scheduler.RoundRobinScheduler
+request_scheduler_id: keyspace
+

 # Scheduler Options vary based on the type of scheduler
 # NoScheduler - Has no options
 # RoundRobin
 #  - throttle_limit -- The throttle_limit is the number of in-flight
-#                      requests per client.  Requests beyond 
+#                      requests per client.  Requests beyond
 #                      that limit are queued up until
 #                      running requests can complete.
 #                      The value of 80 here is twice the number of
@@ -762,3 +765,10 @@ internode_compression: all
 # reducing overhead from the TCP protocol itself, at the cost of increasing
 # latency if you block for cross-datacenter responses.
 inter_dc_tcp_nodelay: false
+
+disk_access_mode: mmap
+row_cache_class_name: org.apache.cassandra.cache.OHCProvider
+
+# Not till 3.5  
+#enable_user_defined_functions: true
+#enable_scripted_user_defined_functions: tru
--- a/examples/cassandra/image/kubernetes-cassandra.jar
+++ b/examples/cassandra/image/kubernetes-cassandra.jar