Spark 2.3.4 StandAlone 集群模式部署
- 相关文档
- 依赖服务
- 系统优化
- 创建路径
- 配置 /etc/profile
- 配置 $SPARK_HOME/conf/spark-env.sh
- 配置 $SPARK_HOME/conf/spark-defaults.conf
- 配置 $SPARK_HOME/conf/slaves
- 分发配置
- 启动 Spark
- 验证 Spark
相关文档
介质路径:https://archive.apache.org/dist/spark/
- 部署文档:https://spark.apache.org/docs/latest/spark-standalone.html
依赖服务
-
JDK 1.8.0_333
-
ZooKeeper 3.5.10:https://blog.csdn.net/weixin_42598916/article/details/135726572?spm=1001.2014.3001.5502
-
Hadoop 3.1.1:https://blog.csdn.net/weixin_42598916/article/details/135726131?spm=1001.2014.3001.5502
-
Scala 2.13.1
系统优化
- 每个节点都需进行如下优化
# 按需求更改主机名 hostname hadoop1 # 关闭 SELinux # 将 SELINUX 值更改为 disabled vi /etc/selinux/config SELINUX=disabled # 需要重启后才可生效 # 查看 SELinux 状态 getenforce # 关闭防火墙 systemctl stop firewalld && systemctl disable firewalld && systemctl status firewalld # 安装 Chrony 服务 yum install chrony -y # 配置 Chrony 服务 # 注释默认的 NTP 服务地址 # 配置所需的 NTP 服务地址 vi /etc/chonry.conf server hadoop1 iburst # 重启 Chrony 服务并配置开机自启 systemctl enable chronyd --now # 查看 Chrony 服务状态 chronyc sources -v 210 Number of sources = 1 .-- Source mode '^' = server, '=' = peer, '#' = local clock. / .- Source state '*' = current synced, '+' = combined , '-' = not combined, | / '?' = unreachable, 'x' = time may be in error, '~' = time too variable. || .- xxxx [ yyyy ] +/- zzzz || Reachability register (octal) -. | xxxx = adjusted offset, || Log2(Polling interval) --. | | yyyy = measured offset, || | | zzzz = estimated error. || | | MS Name/IP address Stratum Poll Reach LastRx Last sample ==================================================================================================================== ^* hadoop1 4 6 377 12 -28us[ -45us] +/- 75ms # 配置免密登录 # 所有节点生成 id_rsa.pub ssh-keygen -t rsa # 将每个节点的 id_rsa.pub 信息,分别放入所有节点的 authorized_keys 文件内 cat id_rsa.pub >> hadoop1:/root/.ssh/authorized_keys cat id_rsa.pub >> hadoop2:/root/.ssh/authorized_keys cat id_rsa.pub >> hadoop3:/root/.ssh/authorized_keys # 最终效果 cat /root/.ssh/authorized_keys # redis-nodes ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDwuKw9LdfDO3Ln+ViNtQEqZtH/RvoFymKkexBXRUK/2XcczKHPv967KHH71L/5vPOQPUXZLZg3TPERlRTIW9MvCh0LmceGAiQHrxczx56RnYh8nESknd2jbHBToGwqgoB8xsB2IQuhze0CqvRs7A0nrbyBvnUpg/DvePTOSSgii4z9kishBCbrCPamQm20drXVDK3gQ9Q+/YJLKa3+mxzI67xfk/jby0A0DD9XKL7fflRgMK0GXEtYsJ04tKc5Bo+w6Zc8gHyryFrKD4wpeoPakqmrdzaTVYI1x5WvrAPrQplxAP8iNfBqRJSHvlDBXVeXgSxz2I4HBshsStkKp root@redis1 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkspWeTwWoWyr6biMnefOYT4kh+7gPAboHAWe7p67IR9pfu+Rkk/vxLFDbi7X6Td9AhIXEZH6fY5BhihBzhRO/VtjE24QqnXdOLDHV1i0rSEYh6GOAbnVl/93lKidQF/2wvnQET31m1iwls3ul6aWw8/pOcxWy6kB+6MRiOExhu+0erE3jBFLcl+e0IJLKp/nLjCof/qWh3hLGVyhgMn/WmGhf7OyUbedXFqAwwS83/M60jSL1nB1lnIOoHrNSdnrN/GJVXmmwJjJAG4g4hbAg2zNind2rz6p4mq5k7iBbDUFghFwKKYsGeV0Onm7SKErFlHCJNFSOgfVNpaUYJ root@redis2 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+DGKAYw3tbdmv2GDsz3HEOdoKk8JVCEvDFczap2g3DoaqwEBkRag2l9IQ3RZL/WtpKe0f2vZzcm5t3d7e6YhyfEXRn1fjOmynTcykB13xAVrlRfJ6Sayur0OiPzWBktpNj8qaTKjwH+lyHGBwa5duqKiVEglEH2mX5grcOa/mH2Mo+IWsCYeCldKjfdBy2drlAim1fYvJwvtg0uDe8sfDUdDonG4phNOVaWB2u79SxKlGnGewGNuOrifIzkbc0mH9kNgrlw/xdSIqaFA738Yn/4n/kSe3BgceJ0wBowLzorgW2ogyGOdQp6MzBRlg/hxn4EDLJisrC9mSCMOOl root@redis3 # hadoop-nodes ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvWawSJqu4/Adnu6TjvV8rVDAqTU2CGNaSBOTDjcytHHOaY8UiwUMKvXUJugBmRkyhtWhQPHrVSmOH6+qMnHk5XQcWBmce8qCQqDoz49WwyZH95ciY/ynKR9dzAJwXN5fvJEoKxBhSJLk27SDsgRUX05IAjTN5Wx05GCNC36CRGHr6bwsC5iK+nv1ZllkRPyqoICJcvVVoJFDe+svNwLJS8bEpTUS/3C6w1RdfEgGVK0/NLnmANz6VIu5LAZqOpwFcB8Zed3wgnoHUfDCSXLEUQbcgRxDvba7lcvOqbiNh4Tr6WctSHw0UD9PSK6AXdS0jAAyjZ1J5kbWaI+vmZ root@hadoop1 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCwCqgQWDgw7sSqNer1oONzsFhoCWBmLqdWOQCcC7RYhD6kiVzdAEP7qZwWKRwoe/E++xP0+slgxsIsXGVoObGrlT3n+g/2xsgTCaBT/6sGV7k28UOozh76GlyfJjzavbwWE9Q2yR2mkb3/ILGE6CUNCkqqLuYEDTG4DxNupGhsGSYChAcjclzYFrMxDARiOJ8cahDjVlmGzFWxNhzJ36pFC1Rdyeu4CrtZ8tkuqQagGZqB63bVmvTiOM2fY8Wp8TNv0Zz2XmFmv7IUhpDXlPZdFCviwLYLLoJ9LTG32rO/jY0U78LFdDpsYdebthztNakKMZEhCqVIR+k1VMPtp root@hadoop2 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHmj5qT64jSc3LCR2EBKB+12C1XxhFlc44X8zdf3mL8gbepG+ndMgBV4eombLg7QjZshCsjhk9d8esofAlrPk5tX/nWWHg3p5jYTh5/6V+iU7VDpWmMVN/87dsjBbmM9P6jTNiwqk4rdSXDKXkmrVygGHnEj95eP35Nq1JKg+GS7RjWWB0+loGQ4eYKO1nj2nYNOlNBi28CKh1uMWf42bDtcfKP3Z4gEOtPBD5rVPiU2Tq6jgtAs/VvaYGv5FHO4MB0lBE1ik8zp/4trfGU5hie/1PzCRAIvsqPEBSzeUs9nhHODj6vZYwgQupK9Qv5jEbQgh6pCGEfFZlfsC03 root@hadoop3 # 配置 OracleJDK # 下载 Oracle JDK 并存放至指定路径内 # 配置 /etc/profile 文件 cat > /etc/profile << EOF # Oracle JDK 1.8.0_333 export JAVA_HOME=/data/service/jdk/jdk1.8.0_333 export CLASSPATH=$:CLASSPATH:$JAVA_HOME/lib/ export PATH=$PATH:$JAVA_HOME/bin EOF # 刷新配置 source /etc/profile # 查看 JDK 状态 java -version java version "1.8.0_333" Java(TM) SE Runtime Environment (build 1.8.0_333-b02) Java HotSpot(TM) 64-Bit Server VM (build 25.333-b02, mixed mode) # 配置 HOSTS 文件 cat > /etc/hosts << EOF # redis-nodes 10.10.10.21 redis1 10.10.10.22 redis2 10.10.10.23 redis3 # hadoop-nodes 10.10.10.131 hadoop1 10.10.10.132 hadoop2 10.10.10.133 hadoop3 EOF # 关闭 swap swapoff -a # 注销 swap 分区挂载 vi /etc/fstab # 配置 vm.swapiness echo "vm.swappiness = 0" >> /etc/sysctl.conf # 刷新配置 sysctl -p # 配置 transparent_hugepage # 临时生效 echo never > /sys/kernel/mm/transparent_hugepage/enabled && echo never > /sys/kernel/mm/transparent_hugepage/defrag # 永久生效 echo "echo never > /sys/kernel/mm/transparent_hugepage/enabled" >> /etc/rc.local && echo "echo never > /sys/kernel/mm/transparent_hugepage/defrag" >> /etc/rc.local # 配置 最大连接数 # CentOS6 的文件名为 90-nproc.conf # CentOS7 的文件名为 20-nproc.conf vi /etc/security/limits.d/20-nproc.conf * - nofile 655350 * - nproc 655350
创建路径
mkdir -p /data/service/spark/{spark_data1,spark_data2,spark_logs,spark_tmp}
配置 /etc/profile
# Spark 2.3.4 export SPARK_HOME=/data/service/spark/spark-2.3.4-bin-hadoop2.7 export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin:$SPARK_HOME/lib
配置 $SPARK_HOME/conf/spark-env.sh
- 将 $SPARK_HOME/conf/spark-env.sh.template 更改为 $SPARK_HOME/conf/spark-env.sh
# 指定相关服务路径 export JAVA_HOME=/data/service/jdk/jdk1.8.0_333 export SCALA_HOME=/data/service/scala/scala-2.13.1 export HADOOP_HOME=/data/service/hadoop/hadoop-3.1.1 export HADOOP_CONF_DIR=/data/service/hadoop/hadoop-3.1.1/etc/hadoop # 指定 Spark 数据路径 export SPARK_LOG_DIR="/data/service/spark/spark_logs" export SPARK_PID_DIR=/data/service/spark/spark_tmp export SPARK_LOCAL_DIRS=/data/service/spark/spark_data1,/data/service/spark/spark_data2 # 指定 Master 配置 export SPARK_MASTER_HOST=redis1 export SPARK_MASTER_PORT=7077 export SPARK_MASTER_WEBUI_PORT=8080 # 指定 WORK 配置 export SPARK_WORKER_CORES=2 export SPARK_WORKER_MEMORY=4g export SPARK_WORKER_INSTANCES=1 # 指定所有角色 JVM 大小 # 无法指定某一个角色的大小 exporter SPARK_DAEMON_MEMORY=512m # 开启 Master HA 模式 export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=hadoop1:2181,hadoop2:2181,hadoop3:2181"
配置 $SPARK_HOME/conf/spark-defaults.conf
- 将 $SPARK_HOME/conf/spark-defaults.conf.template 更改为 $SPARK_HOME/conf/spark-defaults.conf
spark.eventLog.enabled true spark.eventLog.dir hdfs://hdfscluster/spark_history_logs spark.history.fs.logDirectory hdfs://hdfscluster/spark_history_logs spark.history.retainedApplications 5
配置 $SPARK_HOME/conf/slaves
redis1 redis2 redis3
分发配置
-
将 /data/service/spark 分发至所有节点
-
Master-backup 节点调整 $SPARK_HOME/conf/spark-env.sh
export SPARK_MASTER_HOST=redis2
启动 Spark
- Master-master
$SPARK_HOME/sbin/start-all.sh $SPARK_HOME/sbin/start-history-server.sh
- Master-backup
$SPARK_HOME/sbin/start-master.sh
验证 Spark
-
Spark Web UI:http://10.10.10.21:8080
-
Spark HistoryServer Web UI:http://10.10.10.21:18080