MySQL
# 启动
service mysqld start
systemctl start mysql[d]
# 关闭
service mysqld stop
#设置mysql开机启动
chkconfig mysqld on
Hadoop
start-all.sh
stop-all.sh
start-dfs.sh
stop-dfs.sh
start-yarn.sh
stop-yarn.sh
hadoop-daemon.sh start namenode
hadoop-daemons.sh start datanode
yarn-daemon.sh start resourcemanager
yarn-daemons.sh start nodemanager
mr-jobhistory-daemon.sh start historyserver
hadoop dfsadmin -safemode leave
Hive
nohup /export/server/hive-2.1.0/bin/hive --service metastore &
nohup /export/server/hive-2.1.0/bin/hiveserver2 start &
!connect jdbc:hive2://node03:10000
schematool -dbType mysql -initSchema
schematool -dbType mysql -upgradeSchema
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
set dfs.datanode.max.xcievers=8192;
set hive.exec.compress.intermediate=true;
set hive.exec.compress.output=true;
set hive.exec.orc.compression.strategy=COMPRESSION;
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.optimize.bucketmapjoin = true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.noconditionaltask=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;
-- set mapred.max.split.size=2147483648;
-- set mapred.min.split.size.per.node=1000000000;
-- set mapred.min.split.size.per.rack=1000000000;
set hive.vectorized.execution.enabled=true;
set hive.optimize.correlation=true;
set hive.exec.orc.zerocopy=true;
set hive.optimize.skewjoin=true;
-- set hive.skewjoin.key=100000;
set hive.optimize.skewjoin.compiletime=true;
set hive.optimize.union.remove=true;
set hive.groupby.skewindata=false;
Zookeeper
zkServer.sh start
zookeeper-daemon.sh start
Kafka
启动与停止
kafka-server-start.sh config/server.properties >>/dev/null 2>&1 &
kafka-server-stop.sh
封装启动脚本, 记得给权限
#!/bin/bash
KAFKA_HOME=/export/server/kafka_2.12-2.4.1
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${KAFKA_HOME};source /etc/profile;export JMX_PORT=9988;${KAFKA_HOME}/bin/kafka-server-start.sh ${KAFKA_HOME}/config/server.properties >>/dev/null 2>&1 &"
echo "${host} started"
done
封装关闭脚本,记得给权限
#!/bin/bash
KAFKA_HOME=/export/server/kafka_2.12-2.4.1
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${KAFKA_HOME};source /etc/profile;${KAFKA_HOME}/bin/kafka-server-stop.sh"
echo "${host} stoped"
done
彻底删除 kafka 并初始化
bin/kafka-topics.sh --zookeeper node1:2181,node2:2181,node3:2181 --delete --topic test_data
rm-rf /brokers/topics/test_data
rm-rf /config/topics/test_data
rm-rf /admin/delete_topics/test_data
rm -rf /export/data/kafka/kafka-logs/*
bin/kafka-topics.sh --zookeeper node1:2181,node2:2181,node3:2181 --create --topic test_data -- partitions 3 --replication-factor 2
创建主题
kafka-topics.sh --zookeeper node3:2181 --create --topic spark_kafka --partitions 3 --replication-factor 1
kafka-topics.sh --zookeeper node3:2181 --list
启动生产者和消费者
kafka-console-producer.sh --broker-list node3:9092 --topic spark_kafka
kafka-console-consumer.sh --from-beginning --bootstrap-server node3:9092 --topic spark_kafka
kafka-console-consumer.sh --from-beginning --bootstrap-server node3:9092 --topic __consumer_offsets
Spark
启动spark-thriftserver
start-thriftserver.sh \
--hiveconf hive.server2.thrift.port=10001 \
--hiveconf hive.server2.thrift.bind.host=node3 \
--master local[*]
启动 Spark HistoryServer服务, 端口号 18080
sbin/start-history-server.sh
structured Streaming
--memory sink
CREATE TABLE db_spark.tb_word_count (
id int NOT NULL AUTO_INCREMENT,
word varchar(255) NOT NULL,
count int NOT NULL,
PRIMARY KEY (id),
UNIQUE KEY word (word)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
REPLACE INTO tb_word_count (id, word, count) VALUES (NULL, ?, ?);
spark yarn Pi 测试
/export/server/spark/bin/spark-submit \
--master yarn \
--class org.apache.spark.examples.SparkPi \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
10
WordCount yarn
/export/server/spark/bin/spark-submit \
--master yarn \
--driver-memory 512m \
--executor-memory 512m \
--executor-cores 1 \
--num-executors 2 \
--queue default \
--class cn.test.spark._2SparkWordCount \
/opt/spark-chapter01-1.0-SNAPSHOT.jar
Spark-submit
【 Run application local on 8 cores】
/export/server/spark/bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master local[8] \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
100
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master spark://207.184.161.138:7077 \
--executor-memory 20G \
--total-executor-cores 100 \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
1000
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master spark://207.184.161.138:7077 \
--deploy-mode cluster \
--supervise \
--executor-memory 20G \
--total-executor-cores 100 \
/path/to/examples.jar \
1000
export HADOOP_CONF_DIR=XXX
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master yarn \
--deploy-mode cluster \
--executor-memory 20G \
--num-executors 50 \
/path/to/examples.jar \
1000
./bin/spark-submit \
--master spark://207.184.161.138:7077 \
examples/src/main/python/pi.py \
1000
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master mesos://207.184.161.138:7077 \
--deploy-mode cluster \
--supervise \
--executor-memory 20G \
--total-executor-cores 100 \
http://path/to/examples.jar \
1000
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master k8s://xx.yy.zz.ww:443 \
--deploy-mode cluster \
--executor-memory 20G \
--num-executors 50 \
http://path/to/examples.jar \
1000
Sqoop数据抽取和数据验证
export SQOOP_HOME=/export/server/sqoop-1.4.7.bin_hadoop-2.6.0
$SQOOP_HOME/bin/sqoop import \
--connect jdbc:mysql://192.168.88.163:3306/insurance \
--username root \
--password 123456 \
--table dd_table \
--hive-table insurance_ods.dd_table \
--hive-import \
--hive-overwrite \
--fields-terminated-by '\t' \
--delete-target-dir \
-m 1
mysql_log=`$SQOOP_HOME/bin/sqoop eval \
--connect jdbc:mysql://192.168.88.163:3306/insurance \
--username root \
--password 123456 \
--query "select count(1) from dd_table"
`
mysql_cnt=`echo $mysql_log | awk -F'|' {'print $4'} | awk {'print $1'}`
hive_log=`hive -e "select count(1) from insurance_ods.dd_table"`
if [ $mysql_cnt -eq $hive_log ] ; then
echo "mysql表的数据量=$mysql_cnt,hive表的数据量=$hive_log,是相等的"
else
echo "mysql表的数据量=$mysql_cnt,hive表的数据量=$hive_log,不是相等的"
fi
FLink
Flink on Yarn
flink/bin/yarn-session.sh -d -jm 1024 -tm 1024 -s 2
flink/bin/flink run /export/server/flink/examples/batch/WordCount.jar \
--input hdfs://node1.test.cn:8020/wordcount/input
/export/server/flink/bin/flink run \
-m yarn-cluster -yjm 1024 -ytm 1024 \
/export/server/flink/examples/batch/WordCount.jar \
--input hdfs://node1.test.cn:8020/wordcount/input
其它命令
ES 启动
cd /export/server/es/elasticsearch-7.6.1/
/export/server/es/elasticsearch-7.6.1/bin/elasticsearch >>/dev/null 2>&1 &
markdown代码折叠
<details>
<summary><b>点击查看完整代码</b></summary>
<pre><code>
</code></pre>
</details>
免秘钥登录
ssh-keygen -t rsa
ssh-copy-id node1
scp /root/.ssh/authorized_keys node2:/root/.ssh
scp /root/.ssh/authorized_keys node3:/root/.ssh
评论与互动