#Apache Kafka

Get to know about Kafka here

Setup

# To install zookeeper, run the following command:
sudo apt-get install zookeeperd

# Testing
sudo systemctl status zookeeper
sudo systemctl start zookeeper
sudo systemctl enable zookeeper

sudo apt-get install net-tools
# Now you can run the following command to check whether zookeeper is running on port 2181.
sudo netstat -tulpen | grep 2181

# Zookepr cleanup
sudo systemctl stop zookeeper
# vim /etc/zookeeper/conf_example/zoo.cfg
sudo rm -rf /var/lib/zookeeper/

# kafka
cd /opt/binaries/
wget https://downloads.apache.org/kafka/2.4.0/kafka_2.11-2.4.0.tgz
tar xvzf kafka_2.11-2.4.0.tgz -C /opt/binaries/kafka --strip-components=1
cp spark-streaming-playground/docs/conf/kafka/server1.properties /opt/binaries/kafka/config/server1.properties
cp spark-streaming-playground/docs/conf/kafka/server2.properties /opt/binaries/kafka/config/server2.properties
# add following to ~/.bashrc
export KAFKA_HOME="/opt/kafka/"
export PATH="$PATH:${KAFKA_HOME}/bin"

# create 3 brokers for more parallelism
sudo /opt/binaries/kafka/bin/kafka-server-start.sh /opt/binaries/kafka/config/server.properties
sudo /opt/binaries/kafka/bin/kafka-server-start.sh /opt/binaries/kafka/config/server1.properties
sudo /opt/binaries/kafka/bin/kafka-server-start.sh /opt/binaries/kafka/config/server2.properties

#to create a topic
sudo /opt/binaries/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 10 --topic ai_tweets_topic

#to delete the topic
sudo /opt/binaries/kafka/bin/kafka-server-stop.sh
sudo rm -rf /tmp/kafka-logs 
vim /etc/kafka.properties
    delete.topic.enable = true

kafka-topics.sh --delete --zookeeper localhost:2181 --topic ai_tweets_topic

Testing

kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic testing
kafka-console-producer.sh --broker-list localhost:9092 --topic testing

kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic testing --from-beginning

References

  • https://linuxhint.com/install-apache-kafka-ubuntu/

  • https://www.digitalocean.com/community/tutorials/how-to-install-apache-kafka-on-ubuntu-18-04

  • https://github.com/vaquarkhan/Apache-Kafka-poc-and-notes

  • https://www.michael-noll.com/blog/2013/03/13/running-a-multi-broker-apache-kafka-cluster-on-a-single-node/

Test with our code

cd {project_root}/ 
export PYTHONPATH=$(pwd)/src/:$PYTHONPATH
python src/dataset/tweet_dataset.py --mode=start_tweet_stream
kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic ai_tweets_topic --from-beginning