- This project has been repaired several bugs from its original source and been tested stably in Ubuntu 20.04 x86_64.
git clone https://github.com/crazyn2/hadoop-zookeeper-hbase.git
cd hadoop-zookeeper-hbase
chmod +x build-image.sh
./build-image.sh
or
docker pull ctazyn/hadoop-hbase:2.3
- ctazyn/hadoop-hbase:1.0 :ubuntu14.04 + hadoop2 + zookeeper3 + hbase1 + openjdk8
- ctazyn/hadoop-hbase:2.0 :ubuntu18.04 + hadoop2 + zookeeper3 + hbase1 + openjdk8
- ctazyn/hadoop-hbase:2.1 :ubuntu18.04 + hadoop3 + zookeeper3 + hbase1 + openjdk11(2.1 or later the openjdk is 11)
- ctazyn/hadoop-hbase:2.2 :ubuntu20.04 + hadoop3 + zookeeper3 + hbase1 + mariadb + hive3 + openjdk11
- ctazyn/hadoop-hbase:2.3 :hadoop3 + zookeeper3 + hbase1 + openjdk11(mariadb + hive3 just in hadoop-master container to simplify the image disk occupation) (recommanded)
sudo docker network create --driver=bridge hadoop
chmod +x ./start-container.sh
./start-container.sh
output:
start hadoop-master container...
start hadoop-slave1 container...
start hadoop-slave2 container...
root@hadoop-master:~#
- start 3 containers with 1 master and 2 slaves
- you will get into the /root directory of hadoop-master container
./start-hadoop.sh
./run-wordcount.sh
output
input file1.txt:
Hello Hadoop
input file2.txt:
Hello Docker
wordcount output:
Docker 1
Hadoop 1
Hello 2
do 1~3 like section A
sudo ./resize-cluster.sh 5
- specify parameter > 1: 2, 3..
- if the parameter is null, the default is 3
- this script just rebuild hadoop image with different slaves file, which pecifies the name of all slave nodes
sudo ./start-container.sh 5
- use the same parameter as the step 2
do 5~6 like section A
/usr/local/hbase/bin/start-hbase.sh
Warning:please wait at least 3 min until the application launches successfully
/usr/local/hbase/bin/hbase shell
chmod +x stop-docker.sh
./stop-docker.sh
chmod +x start-docker.sh
./start-docker.sh
mapred --daemon start historyserver
root@hadoop-master:/usr/local/hadoop/logs# jps
2148 Jps
22 QuorumPeerMain
1832 ResourceManager
248 NameNode
476 SecondaryNameNode
start-yarn.sh
start-dfs.sh
mvn archetype:generate "-DgroupId=com.companyname.bank" "-DartifactId=consumerBanking" "-DarchetypeArtifactId=maven-archetype-quickstart" "-DinteractiveMode=false"
click Remote Explorer plugin icon which is in the left extension volumn and right click the expected container "attach the container" chioce. Then please wait for a while until the VScode remote server applications is installed.By the way, if the Java Extension Pack plugin remote server isn't installed, you should finish it manually which automatically builds settting.json and launch.json in docker container.
mvn package
hadoop jar {filename}.jar {mainClassPath}
Example
hadoop jar consumerBanking-1.0-SNAPSHOT.jar com/companyname/bank/App
container: ctazyn/hadoop-spark-hbase:latest(Ubuntu20.04+hadoop3.3+spark3)
docker pull ctazyn/hadoop-spark-hbase:latest
Then the similar shell scripts whose name is inserted into spark