diff --git a/README.md b/README.md index d77cfc4..50f8bf5 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,62 @@ -# dockerfiles +# Kafka docker builds + +Automated [Kafka](http://kafka.apache.org/) builds for [solsson/kafka](https://hub.docker.com/r/solsson/kafka/) +and related `kafka-` images under https://hub.docker.com/u/solsson/. -Nowadays we're using separate repositories for dockerization projects. +--- + +This repo used to contain misc dockerfiles, but they've moved to separate repositories for dockerization projects. +We've kept the repository name to avoid breaking the automated build of solsson/kafka in Docker Hub. For legacy Dockerfiles from this repo (if you navigated to here from a Docker Hub [solsson](https://hub.docker.com/u/solsson/) image), see https://github.com/solsson/dockerfiles/tree/misc-dockerfiles. -# Kafka docker builds +--- + +Our kafka images are tested in production with https://github.com/Yolean/kubernetes-kafka/. + +You most likely need to mount your own config files, or for `./bin/kafka-server-start.sh` use overrides like: +``` + --override zookeeper.connect=zookeeper:2181 + --override log.dirs=/var/lib/kafka/data/topics + --override log.retention.hours=-1 + --override broker.id=0 + --override advertised.listener=PLAINTEXT://kafka-0:9092 +``` + +## One image to rule them all + +Official [Kafka distributions](http://kafka.apache.org/downloads) contain startup scripts and config for various services and clients. Thus `./kafka` produces a multi-purpose image for direct use and specialized docker builds. + +We could build specialized images like `kafka-server` but we have two reasons not to: + * Won't be as transparent in Docker Hub because you can't use Automated Build without scripting. + * In reality you'll need to control your own config anyway. + +### Example of downstream image: Kafka Connect + +See ./connect-jmx + +### Example downstream image: Kafka Streams + +TODO + +## Building + +Rudimentary compliance with kubernetes-kafka is tested using a [build-contract](https://github.com/Yolean/build-contract/). + +Build and test using: `docker run -v /var/run/docker.sock:/var/run/docker.sock -v $(pwd)/:/source solsson/build-contract test`. However... while timing issues remain you need some manual intervention: -This repository maintains automated [Kafka](http://kafka.apache.org/) builds for https://hub.docker.com/r/solsson/kafka/ -and related `kafka-` images under https://hub.docker.com/u/solsson/, used with https://github.com/Yolean/kubernetes-kafka/. +```bash +compose='docker-compose -f build-contracts/docker-compose.yml' +$compose up -d zookeeper kafka-0 +$compose logs zookeeper kafka-0 +# can we create topics using the image's provided script? +$compose up test-topic-create-1 +# can a producer send messages using snappy (has issues before with a class missing in the image) +$compose up test-snappy-compression +$compose up test-consume-all +# demo the log/file aggregation image +docker-compose -f build-contracts/docker-compose.files-aggregation.yml up +# demo the JMX->kafka image +docker-compose -f build-contracts/docker-compose.monitoring.yml up +``` diff --git a/build-contracts/docker-compose.files-aggregation.yml b/build-contracts/docker-compose.files-aggregation.yml new file mode 100644 index 0000000..685de9c --- /dev/null +++ b/build-contracts/docker-compose.files-aggregation.yml @@ -0,0 +1,49 @@ +version: '2.0' +services: + + zookeeper: + build: ../kafka + entrypoint: ./bin/zookeeper-server-start.sh + command: + - config/zookeeper.properties + + kafka-0: + build: ../kafka + links: + - zookeeper + entrypoint: ./bin/bin/kafka-server-start.sh + command: + - config/server.properties + - --override + - zookeeper.connect=zookeeper:2181 + - --override + - broker.id=0 + - --override + - advertised.listener=PLAINTEXT://kafka-0:9092 + + connect-files: + build: ../connect-files + labels: + com.yolean.build-target: "" + links: + - kafka-0 + + test-connect-files-real-logs: + build: ../connect-files + links: + - kafka-0 + volumes: + - /var/log:/logs + + test-consume-files: + image: solsson/kafkacat@sha256:1266d140c52cb39bf314b6f22b6d7a01c4c9084781bc779fdfade51214a713a8 + labels: + com.yolean.build-contract: "" + command: + - -b + - kafka-0:9092 + - -t + - files-000 + - -C + - -o + - beginning diff --git a/build-contracts/docker-compose.monitoring.yml b/build-contracts/docker-compose.monitoring.yml new file mode 100644 index 0000000..5656880 --- /dev/null +++ b/build-contracts/docker-compose.monitoring.yml @@ -0,0 +1,77 @@ +version: '2.0' +services: + + zookeeper: + build: ../kafka + entrypoint: ./bin/zookeeper-server-start.sh + command: + - config/zookeeper.properties + + kafka-0: + build: ../kafka + links: + - zookeeper + environment: + - JMX_PORT=5555 + expose: + - '5555' + entrypoint: ./bin/bin/kafka-server-start.sh + command: + - config/server.properties + - --override + - zookeeper.connect=zookeeper:2181 + - --override + - broker.id=0 + - --override + - advertised.listener=PLAINTEXT://kafka-0:9092 + + prometheus-jmx-exporter: + build: ../prometheus-jmx-exporter + labels: + com.yolean.build-target: "" + links: + - kafka-0 + # patch a config before start, as the image is designed for use with local JMX (same k8s pod) + entrypoint: /bin/bash + command: + - -c + - > + sed -i 's|127.0.0.1|kafka-0|' example_configs/kafka-prometheus-monitoring.yml; + cat example_configs/kafka-prometheus-monitoring.yml; + java -jar jmx_prometheus_httpserver.jar + 5556 example_configs/kafka-prometheus-monitoring.yml + + test-metrics-export: + image: solsson/curl@sha256:8b0927b81d10043e70f3e05e33e36fb9b3b0cbfcbccdb9f04fd53f67a270b874 + labels: + com.yolean.build-contract: "" + command: + - --fail-early + - --retry + - '10' + - --retry-delay + - '3' + - --retry-connrefused + - http://prometheus-jmx-exporter:5556/metrics + + connect-jmx: + build: ../connect-jmx + labels: + com.yolean.build-target: "" + links: + - kafka-0 + + # TODO starts too fast, gets % KC_ERROR: Failed to query metadata for topic jmx-test: Local: Broker transport failure + # needs to retry until kafka+topic exists + test-jmx: + image: solsson/kafkacat@sha256:1266d140c52cb39bf314b6f22b6d7a01c4c9084781bc779fdfade51214a713a8 + labels: + com.yolean.build-contract: "" + command: + - -b + - kafka-0:9092 + - -t + - jmx-test + - -C + - -o + - beginning diff --git a/build-contracts/docker-compose.yml b/build-contracts/docker-compose.yml new file mode 100644 index 0000000..e1ec794 --- /dev/null +++ b/build-contracts/docker-compose.yml @@ -0,0 +1,69 @@ +version: '2.0' +services: + + zookeeper: + build: ../kafka + entrypoint: ./bin/zookeeper-server-start.sh + command: + - config/zookeeper.properties + + kafka-0: + build: ../kafka + image: solsson/kafka + labels: + com.yolean.build-target: "" + links: + - zookeeper + entrypoint: ./bin/kafka-server-start.sh + command: + - config/server.properties + - --override + - zookeeper.connect=zookeeper:2181 + - --override + - broker.id=0 + # unlike Kubernetes StatefulSet, compose gives containers a random hostname (leading to redirects to a hex name) + - --override + - advertised.listener=PLAINTEXT://kafka-0:9092 + + test-topic-create: + build: ../kafka + labels: + com.yolean.build-contract: "" + links: + - kafka-0 + entrypoint: ./bin/kafka-topics.sh + command: + - --zookeeper + - zookeeper:2181 + - --create + - --topic + - test-topic-create + - --partitions + - '1' + - --replication-factor + - '1' + + test-snappy-compression: + image: solsson/kafkacat@sha256:1266d140c52cb39bf314b6f22b6d7a01c4c9084781bc779fdfade51214a713a8 + labels: + com.yolean.build-contract: "" + entrypoint: /bin/sh + command: + - -exc + - sleep 5; echo "Message from $${HOSTNAME} at $$(date)" | kafkacat -z snappy -b kafka-0:9092 -t test1 -P + + # TODO starts too fast, gets % KC_ERROR: Failed to query metadata for topic test1: Local: Broker transport failure + # needs to retry until kafka+topic exists + test-consume-all: + image: solsson/kafkacat@sha256:1266d140c52cb39bf314b6f22b6d7a01c4c9084781bc779fdfade51214a713a8 + labels: + com.yolean.build-contract: "" + command: + - -b + - kafka-0:9092 + - -t + - test1 + - -C + - -o + - beginning + - -e diff --git a/connect-files/Dockerfile b/connect-files/Dockerfile new file mode 100644 index 0000000..cc549b8 --- /dev/null +++ b/connect-files/Dockerfile @@ -0,0 +1,12 @@ +FROM solsson/kafka:0.11.0.0 + +COPY worker.properties ./config/ +COPY connect-files.sh ./bin/ + +ENV FILES_LIST_CMD="find /logs/ -name *.log" + +# Set up some sample logs +RUN mkdir /logs/; \ + echo "Mount /logs and/or change FILES_LIST_CMD (currently '$FILES_LIST_CMD') to read real content instead" > /logs/samplefile1.log; + +ENTRYPOINT ["./bin/connect-files.sh"] diff --git a/connect-files/connect-files.sh b/connect-files/connect-files.sh new file mode 100755 index 0000000..cf5007b --- /dev/null +++ b/connect-files/connect-files.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -e + +FILES=$($FILES_LIST_CMD) + +id=0 +connectors="" +for FILE in $FILES; do + ((++id)) + echo "$id: $FILE" + cat < ./config/connect-file-source-$id.properties +name=local-file-source-${id} +connector.class=FileStreamSource +tasks.max=1 +file=${FILE} +topic=files-000 +HERE + + connectors="$connectors ./config/connect-file-source-$id.properties" +done + +./bin/connect-standalone.sh ./config/worker.properties $connectors diff --git a/connect-files/worker.properties b/connect-files/worker.properties new file mode 100644 index 0000000..ff76cd3 --- /dev/null +++ b/connect-files/worker.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These are defaults. This file just demonstrates how to override some settings. +bootstrap.servers=kafka-0:9092 + +# The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will +# need to configure these based on the format they want their data in when loaded from or stored into Kafka +key.converter=org.apache.kafka.connect.json.JsonConverter +value.converter=org.apache.kafka.connect.json.JsonConverter +# Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply +# it to +key.converter.schemas.enable=true +value.converter.schemas.enable=true + +# The internal converter used for offsets and config data is configurable and must be specified, but most users will +# always want to use the built-in default. Offset and config data is never visible outside of Kafka Connect in this format. +internal.key.converter=org.apache.kafka.connect.json.JsonConverter +internal.value.converter=org.apache.kafka.connect.json.JsonConverter +internal.key.converter.schemas.enable=false +internal.value.converter.schemas.enable=false + +offset.storage.file.filename=/tmp/connect.offsets +# Flush much faster than normal, which is useful for testing/debugging +offset.flush.interval.ms=10000 + +# Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins +# (connectors, converters, transformations). The list should consist of top level directories that include +# any combination of: +# a) directories immediately containing jars with plugins and their dependencies +# b) uber-jars with plugins and their dependencies +# c) directories immediately containing the package directory structure of classes of plugins and their dependencies +# Note: symlinks will be followed to discover dependencies or plugins. +# Examples: +# plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, +#plugin.path= diff --git a/connect-jmx/Dockerfile b/connect-jmx/Dockerfile new file mode 100644 index 0000000..fc134a3 --- /dev/null +++ b/connect-jmx/Dockerfile @@ -0,0 +1,31 @@ +FROM solsson/kafka:0.11.0.0 + +ENV srijiths-kafka-connectors-version=dc0a7122650e697d3ae97c970a4785bbed949479 + +RUN set -ex; \ + buildDeps='curl ca-certificates'; \ + apt-get update && apt-get install -y $buildDeps --no-install-recommends; \ + \ + MAVEN_VERSION=3.5.0 PATH=$PATH:$(pwd)/maven/bin; \ + mkdir ./maven; \ + curl -SLs https://archive.apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz | tar -xzf - --strip-components=1 -C ./maven; \ + mvn --version; \ + \ + mkdir ./kafka-connectors; \ + cd ./kafka-connectors; \ + curl -SLs https://github.com/srijiths/kafka-connectors/archive/$srijiths-kafka-connectors-version.tar.gz \ + | tar -xzf - --strip-components=1 -C ./; \ + mvn clean install; \ + cd ..; \ + mv ~/.m2/repository/com/sree/kafka/kafka-connect-jmx/0.0.1/kafka-connect-jmx-0.0.1-jar-with-dependencies.jar ./libs/; \ + rm -rf ./kafka-connectors; \ + rm -rf ./maven ~/.m2; \ + \ + apt-get purge -y --auto-remove $buildDeps; \ + rm -rf /var/lib/apt/lists/*; \ + rm /var/log/dpkg.log /var/log/apt/*.log + +COPY *.properties ./config/ + +ENTRYPOINT ["./bin/connect-standalone.sh"] +CMD ["./config/worker.properties", "./config/connect-jmx.properties"] diff --git a/connect-jmx/connect-jmx.properties b/connect-jmx/connect-jmx.properties new file mode 100644 index 0000000..23b7dd5 --- /dev/null +++ b/connect-jmx/connect-jmx.properties @@ -0,0 +1,19 @@ +# Name of the connecor +name=jmx-source +# Connector class to invoke the connector +connector.class=com.sree.kafka.connectors.jmx.JmxConnector +# Maximum number of tasks +tasks.max=1 + +# Kafka topic to push the messages +kafka.topic=jmx-test +# JMX is running for which application. +# If you want JMX metrics for Kafka , then jmx.servicename=kafka +# If you want JMX metrics for Flink , then jmx.servicename=flink etc.. +jmx.servicename=kafka +# If jmx.servicename is kafka , then you have to provide zookeeper.host +# Else zookeeper.host parameter is not required. +zookeeper.host=zookeeper:2181 +# If jmx.servicename is not kafka , then below property is mandatory +# Provide the full JMX URL separated by comma +#jmx.url=54.238.221.37:8080,54.238.237.66:8080 diff --git a/connect-jmx/worker.properties b/connect-jmx/worker.properties new file mode 100644 index 0000000..ff76cd3 --- /dev/null +++ b/connect-jmx/worker.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These are defaults. This file just demonstrates how to override some settings. +bootstrap.servers=kafka-0:9092 + +# The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will +# need to configure these based on the format they want their data in when loaded from or stored into Kafka +key.converter=org.apache.kafka.connect.json.JsonConverter +value.converter=org.apache.kafka.connect.json.JsonConverter +# Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply +# it to +key.converter.schemas.enable=true +value.converter.schemas.enable=true + +# The internal converter used for offsets and config data is configurable and must be specified, but most users will +# always want to use the built-in default. Offset and config data is never visible outside of Kafka Connect in this format. +internal.key.converter=org.apache.kafka.connect.json.JsonConverter +internal.value.converter=org.apache.kafka.connect.json.JsonConverter +internal.key.converter.schemas.enable=false +internal.value.converter.schemas.enable=false + +offset.storage.file.filename=/tmp/connect.offsets +# Flush much faster than normal, which is useful for testing/debugging +offset.flush.interval.ms=10000 + +# Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins +# (connectors, converters, transformations). The list should consist of top level directories that include +# any combination of: +# a) directories immediately containing jars with plugins and their dependencies +# b) uber-jars with plugins and their dependencies +# c) directories immediately containing the package directory structure of classes of plugins and their dependencies +# Note: symlinks will be followed to discover dependencies or plugins. +# Examples: +# plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, +#plugin.path= diff --git a/kafka/Dockerfile b/kafka/Dockerfile index 4d43b54..238a812 100644 --- a/kafka/Dockerfile +++ b/kafka/Dockerfile @@ -1,16 +1,25 @@ -FROM openjdk:8-jre-alpine +# The only assumption we make about this FROM is that it has a JRE in path +FROM solsson/kafka-jre@sha256:7765513cf5fa455a672a06f584058c1c81cc0b3b56cc56b0cfdf1a917a183f26 -ARG kafka_version=0.10.2.0 -ENV kafka_bin_version=2.12-$kafka_version +ENV KAFKA_VERSION=0.11.0.0 SCALA_VERSION=2.11 -RUN apk add --no-cache --update-cache --virtual build-dependencies curl ca-certificates \ - && mkdir -p /opt/kafka \ - && curl -SLs "https://www-eu.apache.org/dist/kafka/$kafka_version/kafka_$kafka_bin_version.tgz" | tar -xzf - --strip-components=1 -C /opt/kafka \ - && apk del build-dependencies \ - && rm -rf /var/cache/apk/* +RUN set -ex; \ + export DEBIAN_FRONTEND=noninteractive; \ + runDeps='netcat-openbsd'; \ + buildDeps='curl ca-certificates'; \ + apt-get update && apt-get install -y $runDeps $buildDeps --no-install-recommends; \ + \ + SCALA_BINARY_VERSION=$(echo $SCALA_VERSION | cut -f 1-2 -d '.'); \ + mkdir -p /opt/kafka; \ + curl -SLs "https://www-eu.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_BINARY_VERSION-$KAFKA_VERSION.tgz" | tar -xzf - --strip-components=1 -C /opt/kafka; \ + \ + rm -rf /opt/kafka/site-docs; \ + \ + apt-get purge -y --auto-remove $buildDeps; \ + rm -rf /var/lib/apt/lists/*; \ + rm -rf /var/log/dpkg.log /var/log/alternatives.log /var/log/apt WORKDIR /opt/kafka -ENTRYPOINT ["bin/kafka-server-start.sh"] -RUN sed -i 's/zookeeper.connect=localhost:2181/zookeeper.connect=zookeeper:2181/' config/server.properties -CMD ["config/server.properties"] +COPY docker-help.sh /usr/local/bin/docker-help +ENTRYPOINT ["docker-help"] diff --git a/kafka/docker-help.sh b/kafka/docker-help.sh new file mode 100755 index 0000000..8c82eb7 --- /dev/null +++ b/kafka/docker-help.sh @@ -0,0 +1,15 @@ +#!/bin/bash +echo "Hi," +echo "" +echo "This image is basically just the official Kafka distribution," +echo "containing both servers and utils, each with its own help output." +echo "" +echo "Select as entrypoint one of these scripts:" +find ./bin/ -name *.sh +echo "" +echo "You might find one of the sample config files useful:" +find ./config/ -name *.properties +echo "" +echo "Add more using volumes, or downstream images." +echo "Enjoy Kafka!" +echo "" diff --git a/kafka-prometheus-jmx-exporter/Dockerfile b/prometheus-jmx-exporter/Dockerfile similarity index 100% rename from kafka-prometheus-jmx-exporter/Dockerfile rename to prometheus-jmx-exporter/Dockerfile