Hadoop 3.1.1 YARN 集群部署

Hadoo

p 3.1.1 YARN 集群部署

  • 依赖服务
  • 系统优化
  • 查看 ZooKeeper 集群状态
  • 创建路径
  • 配置 /etc/profile
  • 配置 $/HADOOP\_HOME/etc/hadoop/yarn-env.sh
  • 配置 $/HADOOP\_HOME/etc/hadoop/mapred-site.xml
  • 创建 NodeManager 黑名单文件
  • 配置 $/HADOOP\_HOME/etc/hadoop/yarn-site.xml
  • 创建 $/HADOOP\_HOME/etc/hadoop/yarn-fairscheduler.xml
  • 存算分离配置
    • 1. 创建 $/HADOOP\_HOME/etc/hadoop/nmworkers
    • 2. 调整 $/HADOOP\_HOME/sbin/start-yarn.sh
    • 3. 调整 $/HADOOP\_HOME/sbin/stop-yarn.sh
  • 分发配置
  • 启动 YARN
  • 验证 YARN
  • 常用操作
  • 动态调整队列配置

依赖服务

  • JDK 1.8.0_333
  • ZooKeeper 3.5.10:https://blog.csdn.net/weixin_42598916/article/details/135726572?spm=1001.2014.3001.5502

系统优化

  • 每个节点都需进行如下优化
# 按需求更改主机名
hostname hadoop1

# 关闭 SELinux
# 将 SELINUX 值更改为 disabled
vi /etc/selinux/config
SELINUX=disabled

# 需要重启后才可生效
# 查看 SELinux 状态
getenforce

# 关闭防火墙
systemctl stop firewalld && systemctl disable firewalld && systemctl status firewalld

# 安装 Chrony 服务
yum install chrony -y

# 配置 Chrony 服务
# 注释默认的 NTP 服务地址
# 配置所需的 NTP 服务地址
vi /etc/chonry.conf
server hadoop1 iburst

# 重启 Chrony 服务并配置开机自启
systemctl enable chronyd --now

# 查看 Chrony 服务状态
chronyc sources -v
210 Number of sources = 1
.-- Source mode  '^' = server, '=' = peer, '#' = local clock.
/ .- Source state '*' = current synced, '+' = combined , '-' = not combined,
| /   '?' = unreachable, 'x' = time may be in error, '~' = time too variable.
||                                                 .- xxxx [ yyyy ] +/- zzzz
||      Reachability register (octal) -.           |  xxxx = adjusted offset,
||      Log2(Polling interval) --.      |          |  yyyy = measured offset,
||                                     |          |  zzzz = estimated error.
||                                 |    |       
MS Name/IP address         Stratum Poll Reach LastRx Last sample
====================================================================================================================
^* hadoop1                        4   6   377    12    -28us[  -45us] +/-   75ms

# 配置免密登录
# 所有节点生成 id_rsa.pub
ssh-keygen -t rsa

# 将每个节点的 id_rsa.pub 信息,分别放入所有节点的 authorized_keys 文件内
cat id_rsa.pub >> hadoop1:/root/.ssh/authorized_keys
cat id_rsa.pub >> hadoop2:/root/.ssh/authorized_keys
cat id_rsa.pub >> hadoop3:/root/.ssh/authorized_keys

# 最终效果
cat /root/.ssh/authorized_keys
# redis-nodes
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDwuKw9LdfDO3Ln+ViNtQEqZtH/RvoFymKkexBXRUK/2XcczKHPv967KHH71L/5vPOQPUXZLZg3TPERlRTIW9MvCh0LmceGAiQHrxczx56RnYh8nESknd2jbHBToGwqgoB8xsB2IQuhze0CqvRs7A0nrbyBvnUpg/DvePTOSSgii4z9kishBCbrCPamQm20drXVDK3gQ9Q+/YJLKa3+mxzI67xfk/jby0A0DD9XKL7fflRgMK0GXEtYsJ04tKc5Bo+w6Zc8gHyryFrKD4wpeoPakqmrdzaTVYI1x5WvrAPrQplxAP8iNfBqRJSHvlDBXVeXgSxz2I4HBshsStkKp root@redis1
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkspWeTwWoWyr6biMnefOYT4kh+7gPAboHAWe7p67IR9pfu+Rkk/vxLFDbi7X6Td9AhIXEZH6fY5BhihBzhRO/VtjE24QqnXdOLDHV1i0rSEYh6GOAbnVl/93lKidQF/2wvnQET31m1iwls3ul6aWw8/pOcxWy6kB+6MRiOExhu+0erE3jBFLcl+e0IJLKp/nLjCof/qWh3hLGVyhgMn/WmGhf7OyUbedXFqAwwS83/M60jSL1nB1lnIOoHrNSdnrN/GJVXmmwJjJAG4g4hbAg2zNind2rz6p4mq5k7iBbDUFghFwKKYsGeV0Onm7SKErFlHCJNFSOgfVNpaUYJ root@redis2
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+DGKAYw3tbdmv2GDsz3HEOdoKk8JVCEvDFczap2g3DoaqwEBkRag2l9IQ3RZL/WtpKe0f2vZzcm5t3d7e6YhyfEXRn1fjOmynTcykB13xAVrlRfJ6Sayur0OiPzWBktpNj8qaTKjwH+lyHGBwa5duqKiVEglEH2mX5grcOa/mH2Mo+IWsCYeCldKjfdBy2drlAim1fYvJwvtg0uDe8sfDUdDonG4phNOVaWB2u79SxKlGnGewGNuOrifIzkbc0mH9kNgrlw/xdSIqaFA738Yn/4n/kSe3BgceJ0wBowLzorgW2ogyGOdQp6MzBRlg/hxn4EDLJisrC9mSCMOOl root@redis3

# hadoop-nodes
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvWawSJqu4/Adnu6TjvV8rVDAqTU2CGNaSBOTDjcytHHOaY8UiwUMKvXUJugBmRkyhtWhQPHrVSmOH6+qMnHk5XQcWBmce8qCQqDoz49WwyZH95ciY/ynKR9dzAJwXN5fvJEoKxBhSJLk27SDsgRUX05IAjTN5Wx05GCNC36CRGHr6bwsC5iK+nv1ZllkRPyqoICJcvVVoJFDe+svNwLJS8bEpTUS/3C6w1RdfEgGVK0/NLnmANz6VIu5LAZqOpwFcB8Zed3wgnoHUfDCSXLEUQbcgRxDvba7lcvOqbiNh4Tr6WctSHw0UD9PSK6AXdS0jAAyjZ1J5kbWaI+vmZ root@hadoop1
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCwCqgQWDgw7sSqNer1oONzsFhoCWBmLqdWOQCcC7RYhD6kiVzdAEP7qZwWKRwoe/E++xP0+slgxsIsXGVoObGrlT3n+g/2xsgTCaBT/6sGV7k28UOozh76GlyfJjzavbwWE9Q2yR2mkb3/ILGE6CUNCkqqLuYEDTG4DxNupGhsGSYChAcjclzYFrMxDARiOJ8cahDjVlmGzFWxNhzJ36pFC1Rdyeu4CrtZ8tkuqQagGZqB63bVmvTiOM2fY8Wp8TNv0Zz2XmFmv7IUhpDXlPZdFCviwLYLLoJ9LTG32rO/jY0U78LFdDpsYdebthztNakKMZEhCqVIR+k1VMPtp root@hadoop2
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHmj5qT64jSc3LCR2EBKB+12C1XxhFlc44X8zdf3mL8gbepG+ndMgBV4eombLg7QjZshCsjhk9d8esofAlrPk5tX/nWWHg3p5jYTh5/6V+iU7VDpWmMVN/87dsjBbmM9P6jTNiwqk4rdSXDKXkmrVygGHnEj95eP35Nq1JKg+GS7RjWWB0+loGQ4eYKO1nj2nYNOlNBi28CKh1uMWf42bDtcfKP3Z4gEOtPBD5rVPiU2Tq6jgtAs/VvaYGv5FHO4MB0lBE1ik8zp/4trfGU5hie/1PzCRAIvsqPEBSzeUs9nhHODj6vZYwgQupK9Qv5jEbQgh6pCGEfFZlfsC03 root@hadoop3

# 配置 OracleJDK
# 下载 Oracle JDK 并存放至指定路径内
# 配置 /etc/profile 文件
cat > /etc/profile << EOF

# Oracle JDK 1.8.0_333
export JAVA_HOME=/data/service/jdk/jdk1.8.0_333
export CLASSPATH=$:CLASSPATH:$JAVA_HOME/lib/
export PATH=$PATH:$JAVA_HOME/bin
EOF

# 刷新配置
source /etc/profile

# 查看 JDK 状态
java -version
java version "1.8.0_333"
Java(TM) SE Runtime Environment (build 1.8.0_333-b02)
Java HotSpot(TM) 64-Bit Server VM (build 25.333-b02, mixed mode)

# 配置 HOSTS 文件
cat > /etc/hosts << EOF
# redis-nodes
10.10.10.21 redis1
10.10.10.22 redis2
10.10.10.23 redis3

# hadoop-nodes
10.10.10.131 hadoop1
10.10.10.132 hadoop2
10.10.10.133 hadoop3
EOF

# 关闭 swap
swapoff -a

# 注销 swap 分区挂载
vi /etc/fstab

# 配置 vm.swapiness
echo "vm.swappiness = 0" >> /etc/sysctl.conf

# 刷新配置
sysctl -p

# 配置 transparent_hugepage
# 临时生效
echo never > /sys/kernel/mm/transparent_hugepage/enabled && echo never > /sys/kernel/mm/transparent_hugepage/defrag

# 永久生效
echo "echo never > /sys/kernel/mm/transparent_hugepage/enabled" >> /etc/rc.local && echo "echo never > /sys/kernel/mm/transparent_hugepage/defrag" >> /etc/rc.local

# 配置 最大连接数
# CentOS6 的文件名为 90-nproc.conf
# CentOS7 的文件名为 20-nproc.conf
vi /etc/security/limits.d/20-nproc.conf
* - nofile 655350
* - nproc 655350

查看 ZooKeeper 集群状态

$ZK_HOME/bin/zkCli.sh -server hadoop1:2181,hadoop2:2181,hadoop3:2181
[zk: hadoop1:2181,hadoop2:2181,hadoop3:2181(CONNECTED) 0] ls /
[admin, brokers, cluster, config, consumers, controller, controller_epoch, hadoop-ha, hbase, isr_change_notification, latest_producer_id_block, log_dir_event_notification, rmstore, spark, yarn-leader-election, zookeeper]

创建路径

mkdir -p /data/service/hadoop/yarn-app-logs/logs

配置 /etc/profile

# Hadoop 3.1.1
export HADOOP_HOME=/data/service/hadoop/hadoop-3.1.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/lib

配置 $/HADOOP_HOME/etc/hadoop/yarn-env.sh

export JAVA_HOME="/data/service/jdk/jdk1.8.0_333"
export HADOOP_HOME="/data/service/hadoop/hadoop-3.1.1"
export YARN_RESOURCEMANAGER_OPTS="-Xms1024m -Xmx1024m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled"
export YARN_NODEMANAGER_OPTS="-Xms512m -Xmx512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled"
export YANR_JOBHISTORY_OPTS="-Xms512m -Xmx512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled"
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export YANR_JOBHISTORY_USER=root

配置 $/HADOOP_HOME/etc/hadoop/mapred-site.xml

<configuration>
<!--MapReduce配置-->
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    <description>指定 MapReduce 框架为 YARN</description>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    <description>指定 ApplicationManager 环境路径</description>
  </property>
  <property>
    <name>mapreduce.map.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    <description>指定 map 环境路径</description>
  </property>
  <property>
    <name>mapreduce.reduce.env</name>
    <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
    <description>指定 reduce 环境路径</description>
  </property>
  <property> 
    <name>mapreduce.application.classpath</name>
    <value>
      /data/service/hadoop/hadoop-3.1.1/etc/hadoop,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/common/lib/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/common/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs/lib/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/lib/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn/lib/*,
      /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn/*
    </value>
    <description>指定任务的 class 路径。可通过命令 hadoop classpath 获取</description>
  </property>
<!--JobHistory配置-->
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>hadoop1:10020</value>
    <description>指定 JobHistory 地址</description>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>hadoop1:19888</value>
    <description>指定 JobHistory Web 访问地址</description>
  </property>
</configuration>

创建 NodeManager 黑名单文件

touch $HADOOP_HOME/etc/hadoop/hdfs-exclude.txt

配置 $/HADOOP_HOME/etc/hadoop/yarn-site.xml

<configuration>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>开启 MapReduce 功能</description>
  </property>
  <property>
    <name>yarn.resourcemanager.nodes.exclude-path</name>
    <value>/data/service/hadoop/hadoop-3.1.1/etc/hadoop/yarn-exclude.txt</value>
    <description>NodeManager 黑名单文件路径</description>
  </property>
<!-- RM HA功能 -->
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
    <description>开启 RM HA</description>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarncluster</value>
    <description>指定 YARN 集群 ID</description>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
    <description>指定 RM ID</description>
    </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>hadoop1</value>
    <description>指定 rm1 主机名</description>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>hadoop2</value>
    <description>指定 rm2 主机名</description>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>hadoop1:8088</value>
    <description>指定 rm1 web 访问地址</description>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>hadoop2:8088</value>
    <description>指定 rm2 web 访问地址</description>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
    <description>指定 ZooKeeper 地址</description>
  </property>
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
    <description>启动 ResourceManager 自动故障转移</description>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    <description>开启将 ResourceManager 状态信息存放至 ZooKeeper 中</description>
  </property>
<!-- 日志聚合功能 -->
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    <description>开启日志聚合功能</description>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/yarn-aggregtion-logs</value>
    <description>存放至 HDFS 的路径</description>
  </property>
  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>3600</value>
    <description>设置日志聚合保留时间</description>
  </property>
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/data/service/hadoop/yarn-app-logs</value>
    <description>NodeManager 数据中转路径</description>
  </property>
  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/data/service/hadoop/yarn-app-logs/logs</value>
    <description>NodeManager 临时日志路径</description>
  </property>
  <property>
    <name>yarn.nodemanager.delete.debug-delay-sec</name>
    <value>0</value>
    <description>NodeManager 本地日志保留时间</description>
  </property>
<!-- 资源管理 -->
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>8192</value>
    <description>每个 NodeManager 所能使用的内存总量</description>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>4</value>
    <description>每个 NodeManager 所能使用的 CPU 总量</description>
  </property>
  <property>
    <name> yarn.nodemanager.pmem-check-enabled</name>
    <value>ture</value>
    <description>检查任务是否所使用物理内存是否超过配额,超过则强制关闭</description>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>ture</value>
    <description>检查任务是否所使用虚拟内存是否超过配额,超过则强制关闭</description>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>2</value>
    <description>任务每使用 1MB 物理内存,最多可使用虚拟内存量</description>
  </property>
<!-- 队列管理 -->
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
    <description>设置队列资源分配策略为 FairScheduler,公平分配策略</description>
  </property>
  <property>
    <name>yarn.scheduler.fair.user-as-default-queue</name>
    <value>ture</value>
    <description>未指定队列时,则默认在 default 队列运行任务</description>
  </property>
  <property>
    <name>yarn.scheduler.fair.allocation.file</name>
    <value>/data/service/hadoop/hadoop-3.1.1/etc/hadoop/yarn-fairscheduler.xml</value>
    <description>指定队列配置文件路径</description>
  </property>
  <property>
    <name>yarn.scheduler.fair.preemption</name>
    <value>false</value>
    <description>关闭抢占机制</description>
  </property>
<!-- 参数优化 -->
  <property>
    <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
    <value>95</value>
    <description>磁盘运行状态检查程序的最大磁盘利用率百分比。基于磁盘总量及 HDFS 使用率</description>
  </property>
</configuration>

创建 $/HADOOP_HOME/etc/hadoop/yarn-fairscheduler.xml

  • 若 yarn-site.xml 中 yarn.resourcemanager.scheduler.class 所指定的策略不是 FairScheduluer,则无需配置此文件
<?xml version="1.0"?>
<allocations>
  <queue name="test_queue">
    <!-- 最少队列资源保证量 -->
    <minResources>1024 mb,1vcores</minResources>
    <!-- 最大队列资源保证量 -->
    <maxResources>2048 mb,2vcores</maxResources>
    <!-- 最大同时运行任务量 -->
    <maxRunningApps>5</maxRunningApps>
    <!-- 队列权重 -->
    <weight>1.0</weight>
    <!-- 队列调度模式 -->
    <schedulingPolicy>fair</schedulingPolicy>
    <!-- 队列所允许用户 -->
    <aclSubmitApps>*</aclSubmitApps>
    <!-- 队列所允许用户组 -->
    <aclAdministerApps>*</aclAdministerApps>
  </queue>
  <queue name="hbase">
    <minResources>2048mb,2vcores</minResources>
    <maxResources>4096mb,4vcores</maxResources>
    <maxRunningApps>10</maxRunningApps>
    <schedulingMode>fair</schedulingMode>
    <weight>2.0</weight>
    <aclSubmitApps>*</aclSubmitApps>
    <aclAdministerApps>hbase</aclAdministerApps>
  </queue>
</allocations>

存算分离配置

  • hadoop-HDFS 和 hadoop-YARN 默认使用同一个文件 $/HADOOP_HOME/etc/hadoop/workers 控制服务节点
  • 若不进行调整,则 DN 和 NM 永远会是相同节点。也就是要么都部署,要么都不部署
  • 通过如下调整,可独立调整 DN 和 NM 所部署节点,使其不会相互影响

1. 创建 $/HADOOP_HOME/etc/hadoop/nmworkers

redis1
redis2
redis3

2. 调整 $/HADOOP_HOME/sbin/start-yarn.sh

echo "Starting nodemanagers"
hadoop_uservar_su yarn nodemanager "${HADOOP_YARN_HOME}/bin/yarn" 
    --config "${HADOOP_CONF_DIR}" 
    --workers 
    --hosts "/data/service/hadoop/hadoop-3.1.1/etc/hadoop/nmworkers" 
    --daemon start 
    nodemanager
(( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? ))

3. 调整 $/HADOOP_HOME/sbin/stop-yarn.sh

echo "Stopping nodemanagers"
hadoop_uservar_su yarn nodemanager "${HADOOP_YARN_HOME}/bin/yarn" 
    --config "${HADOOP_CONF_DIR}" 
    --workers 
    --hosts "/data/service/hadoop/hadoop-3.1.1/etc/hadoop/nmworkers" 
    --daemon stop 
    nodemanager

分发配置

  • 将 /data/service/hadoop 分发至所有节点

启动 YARN

  • RM-Master
$HADOOP_HOME/sbin/start-yarn.sh
  • RM-HistoryServer
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

验证 YARN

  • YARN Web UI:10.10.10.131:8088
  • YARN Job History Web UI:10.10.10.131:19888
  • 命令行查看
yarn rmadmin -getAllServiceState
hadoop1:8033                                       standby   
hadoop2:8033                                       active

yarn node -list
2022-11-16 14:52:41,372 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
Total Nodes:3
         Node-Id       Node-State Node-Http-Address Number-of-Running-Containers
    redis3:37413          RUNNING       redis3:8042                            0
    redis1:36337          RUNNING       redis1:8042                            0
    redis2:45012          RUNNING       redis2:8042                            0

yarn queue -status default
2022-11-16 13:47:14,829 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
Queue Information : 
Queue Name : root.default
  State : RUNNING
  Capacity : .0%
  Current Capacity : .0%
  Maximum Capacity : -100.0%
  Default Node Label expression : <DEFAULT_PARTITION>
  Accessible Node Labels : 

yarn queue -status test_queue
2022-11-16 13:47:21,889 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
Queue Information : 
Queue Name : root.test_queue
  State : RUNNING
  Capacity : .0%
  Current Capacity : .0%
  Maximum Capacity : -100.0%
  Default Node Label expression : <DEFAULT_PARTITION>
  Accessible Node Labels : 

yarn queue -status hbase
2022-11-16 13:47:27,761 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
Queue Information : 
Queue Name : root.hbase
  State : RUNNING
  Capacity : .0%
  Current Capacity : .0%
  Maximum Capacity : -100.0%
  Default Node Label expression : <DEFAULT_PARTITION>
  Accessible Node Labels :

常用操作

# 创建测试路径
hdfs dfs -mkdir -p hdfs://hdfscluster/test/input

# 上传测试文本
hdfs dfs -put /data/service/hadoop/TheCountofMonteCristo.txt hdfs://hdfscluster/test/input/

# 提交 YARN 任务
# 不可提前创建输出路径,否则会报出目录已存在,导致任务失败
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.1.jar wordcount hdfs://hdfscluster/test/input/TheCountofMonteCristo.txt hdfs://hdfscluster/test/output/

# 指定队列提交任务
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.1.jar wordcount -Dmapreduce.job.queuename=test1 hdfs://hdfscluster/test/input/TheCountofMonteCristo.txt hdfs://hdfscluster/test/output/

# 查看所有正在运行的任务清单
yarn application -list
Total number of applications (application-types: [], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):1
                Application-Id      Application-Name        Application-Type          User           Queue                   State             Final-State             Progress                        Tracking-URL
application_1684895892069_0003            word count               MAPREDUCE          root       root.root                ACCEPTED               UNDEFINED                   0%                                 N/A

# 杀死指定任务
yarn application -kill <application id>

# 查看某个任务的详细状态
yarn application -status <application id>
Application Report :
        Application-Id : application_1684895892069_0003
        Application-Name : word count
        Application-Type : MAPREDUCE
        User : root
        Queue : root.root
        Application Priority : 0
        Start-Time : 1684896396433
        Finish-Time : 0
        Progress : 50%
        State : RUNNING
        Final-State : UNDEFINED
        Tracking-URL :  http://redis1:45887
        RPC Port : 34787
        AM Host : redis1
        Aggregate Resource Allocation : 57139 MB-seconds, 33 vcore-seconds
        Aggregate Resource Preempted : 0 MB-seconds, 0 vcore-seconds
        Log Aggregation Status : NOT_START
        Diagnostics :
        Unmanaged Application : false
        Application Node Label Expression : <Not set>
        AM container Node Label Expression : <DEFAULT_PARTITION>
        TimeoutType : LIFETIME  ExpiryTime : UNLIMITED  RemainingTime : -1seconds

# 查看指定类型的任务清单
yarn application -list -appTypes <application type>
Total number of applications (application-types: [MAPREDUCE], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):1
                Application-Id      Application-Name        Application-Type          User           Queue                   State             Final-State             Progress                        Tracking-URL
application_1684895892069_0004            word count               MAPREDUCE          root       root.root                ACCEPTED               UNDEFINED                   0%                                 N/A

# 查看指定任务日志
yarn logs -applicationId <application id> >> app.log

动态调整队列配置

# 查看是否存在队列 test1
yarn queue -status test1
Cannot get queue from RM by queueName = test1, please check.

# 将 test1 配置添加至 yarn-fairscheduler.xml 文件内
vi yarn-fairscheduler.xml
  <queue name="test1">
    <minResources>2048mb,2vcores</minResources>
    <maxResources>4096mb,4vcores</maxResources>
    <maxRunningApps>10</maxRunningApps>
    <schedulingMode>fair</schedulingMode>
    <weight>2.0</weight>
    <aclSubmitApps>*</aclSubmitApps>
    <aclAdministerApps>hbase</aclAdministerApps>
  </queue>

# 将 yarn-fairscheduler.xml 同步到所有 ResourceManager 中
scp yarn-fairscheduler.xml root@hadoop2:/data/service/hadoop/hadoop-3.1.1/etc/hadoop/

# 刷新队列配置
# 有时可能会自动刷新队列配置,但保险起见,建议在配置完成后,手动进行配置刷新
yarn rmadmin -refreshQueues

# 复查队列 test1 状态
yarn queue -status test1
Queue Information :
Queue Name : root.test1
        State : RUNNING
        Capacity : .0%
        Current Capacity : .0%
        Maximum Capacity : -100.0%
        Default Node Label expression : <DEFAULT_PARTITION>
        Accessible Node Labels :