Hadoo
p 3.1.1 YARN 集群部署
- 依赖服务
- 系统优化
- 查看 ZooKeeper 集群状态
- 创建路径
- 配置 /etc/profile
- 配置 $/HADOOP\_HOME/etc/hadoop/yarn-env.sh
- 配置 $/HADOOP\_HOME/etc/hadoop/mapred-site.xml
- 创建 NodeManager 黑名单文件
- 配置 $/HADOOP\_HOME/etc/hadoop/yarn-site.xml
- 创建 $/HADOOP\_HOME/etc/hadoop/yarn-fairscheduler.xml
- 存算分离配置
-
- 1. 创建 $/HADOOP\_HOME/etc/hadoop/nmworkers
- 2. 调整 $/HADOOP\_HOME/sbin/start-yarn.sh
- 3. 调整 $/HADOOP\_HOME/sbin/stop-yarn.sh
- 分发配置
- 启动 YARN
- 验证 YARN
- 常用操作
- 动态调整队列配置
依赖服务
- JDK 1.8.0_333
- ZooKeeper 3.5.10:https://blog.csdn.net/weixin_42598916/article/details/135726572?spm=1001.2014.3001.5502
系统优化
- 每个节点都需进行如下优化
# 按需求更改主机名 hostname hadoop1 # 关闭 SELinux # 将 SELINUX 值更改为 disabled vi /etc/selinux/config SELINUX=disabled # 需要重启后才可生效 # 查看 SELinux 状态 getenforce # 关闭防火墙 systemctl stop firewalld && systemctl disable firewalld && systemctl status firewalld # 安装 Chrony 服务 yum install chrony -y # 配置 Chrony 服务 # 注释默认的 NTP 服务地址 # 配置所需的 NTP 服务地址 vi /etc/chonry.conf server hadoop1 iburst # 重启 Chrony 服务并配置开机自启 systemctl enable chronyd --now # 查看 Chrony 服务状态 chronyc sources -v 210 Number of sources = 1 .-- Source mode '^' = server, '=' = peer, '#' = local clock. / .- Source state '*' = current synced, '+' = combined , '-' = not combined, | / '?' = unreachable, 'x' = time may be in error, '~' = time too variable. || .- xxxx [ yyyy ] +/- zzzz || Reachability register (octal) -. | xxxx = adjusted offset, || Log2(Polling interval) --. | | yyyy = measured offset, || | | zzzz = estimated error. || | | MS Name/IP address Stratum Poll Reach LastRx Last sample ==================================================================================================================== ^* hadoop1 4 6 377 12 -28us[ -45us] +/- 75ms # 配置免密登录 # 所有节点生成 id_rsa.pub ssh-keygen -t rsa # 将每个节点的 id_rsa.pub 信息,分别放入所有节点的 authorized_keys 文件内 cat id_rsa.pub >> hadoop1:/root/.ssh/authorized_keys cat id_rsa.pub >> hadoop2:/root/.ssh/authorized_keys cat id_rsa.pub >> hadoop3:/root/.ssh/authorized_keys # 最终效果 cat /root/.ssh/authorized_keys # redis-nodes ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDwuKw9LdfDO3Ln+ViNtQEqZtH/RvoFymKkexBXRUK/2XcczKHPv967KHH71L/5vPOQPUXZLZg3TPERlRTIW9MvCh0LmceGAiQHrxczx56RnYh8nESknd2jbHBToGwqgoB8xsB2IQuhze0CqvRs7A0nrbyBvnUpg/DvePTOSSgii4z9kishBCbrCPamQm20drXVDK3gQ9Q+/YJLKa3+mxzI67xfk/jby0A0DD9XKL7fflRgMK0GXEtYsJ04tKc5Bo+w6Zc8gHyryFrKD4wpeoPakqmrdzaTVYI1x5WvrAPrQplxAP8iNfBqRJSHvlDBXVeXgSxz2I4HBshsStkKp root@redis1 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkspWeTwWoWyr6biMnefOYT4kh+7gPAboHAWe7p67IR9pfu+Rkk/vxLFDbi7X6Td9AhIXEZH6fY5BhihBzhRO/VtjE24QqnXdOLDHV1i0rSEYh6GOAbnVl/93lKidQF/2wvnQET31m1iwls3ul6aWw8/pOcxWy6kB+6MRiOExhu+0erE3jBFLcl+e0IJLKp/nLjCof/qWh3hLGVyhgMn/WmGhf7OyUbedXFqAwwS83/M60jSL1nB1lnIOoHrNSdnrN/GJVXmmwJjJAG4g4hbAg2zNind2rz6p4mq5k7iBbDUFghFwKKYsGeV0Onm7SKErFlHCJNFSOgfVNpaUYJ root@redis2 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+DGKAYw3tbdmv2GDsz3HEOdoKk8JVCEvDFczap2g3DoaqwEBkRag2l9IQ3RZL/WtpKe0f2vZzcm5t3d7e6YhyfEXRn1fjOmynTcykB13xAVrlRfJ6Sayur0OiPzWBktpNj8qaTKjwH+lyHGBwa5duqKiVEglEH2mX5grcOa/mH2Mo+IWsCYeCldKjfdBy2drlAim1fYvJwvtg0uDe8sfDUdDonG4phNOVaWB2u79SxKlGnGewGNuOrifIzkbc0mH9kNgrlw/xdSIqaFA738Yn/4n/kSe3BgceJ0wBowLzorgW2ogyGOdQp6MzBRlg/hxn4EDLJisrC9mSCMOOl root@redis3 # hadoop-nodes ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvWawSJqu4/Adnu6TjvV8rVDAqTU2CGNaSBOTDjcytHHOaY8UiwUMKvXUJugBmRkyhtWhQPHrVSmOH6+qMnHk5XQcWBmce8qCQqDoz49WwyZH95ciY/ynKR9dzAJwXN5fvJEoKxBhSJLk27SDsgRUX05IAjTN5Wx05GCNC36CRGHr6bwsC5iK+nv1ZllkRPyqoICJcvVVoJFDe+svNwLJS8bEpTUS/3C6w1RdfEgGVK0/NLnmANz6VIu5LAZqOpwFcB8Zed3wgnoHUfDCSXLEUQbcgRxDvba7lcvOqbiNh4Tr6WctSHw0UD9PSK6AXdS0jAAyjZ1J5kbWaI+vmZ root@hadoop1 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCwCqgQWDgw7sSqNer1oONzsFhoCWBmLqdWOQCcC7RYhD6kiVzdAEP7qZwWKRwoe/E++xP0+slgxsIsXGVoObGrlT3n+g/2xsgTCaBT/6sGV7k28UOozh76GlyfJjzavbwWE9Q2yR2mkb3/ILGE6CUNCkqqLuYEDTG4DxNupGhsGSYChAcjclzYFrMxDARiOJ8cahDjVlmGzFWxNhzJ36pFC1Rdyeu4CrtZ8tkuqQagGZqB63bVmvTiOM2fY8Wp8TNv0Zz2XmFmv7IUhpDXlPZdFCviwLYLLoJ9LTG32rO/jY0U78LFdDpsYdebthztNakKMZEhCqVIR+k1VMPtp root@hadoop2 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHmj5qT64jSc3LCR2EBKB+12C1XxhFlc44X8zdf3mL8gbepG+ndMgBV4eombLg7QjZshCsjhk9d8esofAlrPk5tX/nWWHg3p5jYTh5/6V+iU7VDpWmMVN/87dsjBbmM9P6jTNiwqk4rdSXDKXkmrVygGHnEj95eP35Nq1JKg+GS7RjWWB0+loGQ4eYKO1nj2nYNOlNBi28CKh1uMWf42bDtcfKP3Z4gEOtPBD5rVPiU2Tq6jgtAs/VvaYGv5FHO4MB0lBE1ik8zp/4trfGU5hie/1PzCRAIvsqPEBSzeUs9nhHODj6vZYwgQupK9Qv5jEbQgh6pCGEfFZlfsC03 root@hadoop3 # 配置 OracleJDK # 下载 Oracle JDK 并存放至指定路径内 # 配置 /etc/profile 文件 cat > /etc/profile << EOF # Oracle JDK 1.8.0_333 export JAVA_HOME=/data/service/jdk/jdk1.8.0_333 export CLASSPATH=$:CLASSPATH:$JAVA_HOME/lib/ export PATH=$PATH:$JAVA_HOME/bin EOF # 刷新配置 source /etc/profile # 查看 JDK 状态 java -version java version "1.8.0_333" Java(TM) SE Runtime Environment (build 1.8.0_333-b02) Java HotSpot(TM) 64-Bit Server VM (build 25.333-b02, mixed mode) # 配置 HOSTS 文件 cat > /etc/hosts << EOF # redis-nodes 10.10.10.21 redis1 10.10.10.22 redis2 10.10.10.23 redis3 # hadoop-nodes 10.10.10.131 hadoop1 10.10.10.132 hadoop2 10.10.10.133 hadoop3 EOF # 关闭 swap swapoff -a # 注销 swap 分区挂载 vi /etc/fstab # 配置 vm.swapiness echo "vm.swappiness = 0" >> /etc/sysctl.conf # 刷新配置 sysctl -p # 配置 transparent_hugepage # 临时生效 echo never > /sys/kernel/mm/transparent_hugepage/enabled && echo never > /sys/kernel/mm/transparent_hugepage/defrag # 永久生效 echo "echo never > /sys/kernel/mm/transparent_hugepage/enabled" >> /etc/rc.local && echo "echo never > /sys/kernel/mm/transparent_hugepage/defrag" >> /etc/rc.local # 配置 最大连接数 # CentOS6 的文件名为 90-nproc.conf # CentOS7 的文件名为 20-nproc.conf vi /etc/security/limits.d/20-nproc.conf * - nofile 655350 * - nproc 655350
查看 ZooKeeper 集群状态
$ZK_HOME/bin/zkCli.sh -server hadoop1:2181,hadoop2:2181,hadoop3:2181 [zk: hadoop1:2181,hadoop2:2181,hadoop3:2181(CONNECTED) 0] ls / [admin, brokers, cluster, config, consumers, controller, controller_epoch, hadoop-ha, hbase, isr_change_notification, latest_producer_id_block, log_dir_event_notification, rmstore, spark, yarn-leader-election, zookeeper]
创建路径
mkdir -p /data/service/hadoop/yarn-app-logs/logs
配置 /etc/profile
# Hadoop 3.1.1 export HADOOP_HOME=/data/service/hadoop/hadoop-3.1.1 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/lib
配置 $/HADOOP_HOME/etc/hadoop/yarn-env.sh
export JAVA_HOME="/data/service/jdk/jdk1.8.0_333" export HADOOP_HOME="/data/service/hadoop/hadoop-3.1.1" export YARN_RESOURCEMANAGER_OPTS="-Xms1024m -Xmx1024m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled" export YARN_NODEMANAGER_OPTS="-Xms512m -Xmx512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled" export YANR_JOBHISTORY_OPTS="-Xms512m -Xmx512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=80 -XX:+CMSParallelRemarkEnabled" export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root export YANR_JOBHISTORY_USER=root
配置 $/HADOOP_HOME/etc/hadoop/mapred-site.xml
<configuration> <!--MapReduce配置--> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <description>指定 MapReduce 框架为 YARN</description> </property> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> <description>指定 ApplicationManager 环境路径</description> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> <description>指定 map 环境路径</description> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value> <description>指定 reduce 环境路径</description> </property> <property> <name>mapreduce.application.classpath</name> <value> /data/service/hadoop/hadoop-3.1.1/etc/hadoop, /data/service/hadoop/hadoop-3.1.1/share/hadoop/common/lib/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/common/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs, /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs/lib/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/hdfs/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/lib/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/mapreduce/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn, /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn/lib/*, /data/service/hadoop/hadoop-3.1.1/share/hadoop/yarn/* </value> <description>指定任务的 class 路径。可通过命令 hadoop classpath 获取</description> </property> <!--JobHistory配置--> <property> <name>mapreduce.jobhistory.address</name> <value>hadoop1:10020</value> <description>指定 JobHistory 地址</description> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>hadoop1:19888</value> <description>指定 JobHistory Web 访问地址</description> </property> </configuration>
创建 NodeManager 黑名单文件
touch $HADOOP_HOME/etc/hadoop/hdfs-exclude.txt
配置 $/HADOOP_HOME/etc/hadoop/yarn-site.xml
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description>开启 MapReduce 功能</description> </property> <property> <name>yarn.resourcemanager.nodes.exclude-path</name> <value>/data/service/hadoop/hadoop-3.1.1/etc/hadoop/yarn-exclude.txt</value> <description>NodeManager 黑名单文件路径</description> </property> <!-- RM HA功能 --> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> <description>开启 RM HA</description> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>yarncluster</value> <description>指定 YARN 集群 ID</description> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> <description>指定 RM ID</description> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>hadoop1</value> <description>指定 rm1 主机名</description> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>hadoop2</value> <description>指定 rm2 主机名</description> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>hadoop1:8088</value> <description>指定 rm1 web 访问地址</description> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>hadoop2:8088</value> <description>指定 rm2 web 访问地址</description> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value> <description>指定 ZooKeeper 地址</description> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> <description>启动 ResourceManager 自动故障转移</description> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> <description>开启将 ResourceManager 状态信息存放至 ZooKeeper 中</description> </property> <!-- 日志聚合功能 --> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> <description>开启日志聚合功能</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/yarn-aggregtion-logs</value> <description>存放至 HDFS 的路径</description> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>3600</value> <description>设置日志聚合保留时间</description> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/service/hadoop/yarn-app-logs</value> <description>NodeManager 数据中转路径</description> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/service/hadoop/yarn-app-logs/logs</value> <description>NodeManager 临时日志路径</description> </property> <property> <name>yarn.nodemanager.delete.debug-delay-sec</name> <value>0</value> <description>NodeManager 本地日志保留时间</description> </property> <!-- 资源管理 --> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>8192</value> <description>每个 NodeManager 所能使用的内存总量</description> </property> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>4</value> <description>每个 NodeManager 所能使用的 CPU 总量</description> </property> <property> <name> yarn.nodemanager.pmem-check-enabled</name> <value>ture</value> <description>检查任务是否所使用物理内存是否超过配额,超过则强制关闭</description> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>ture</value> <description>检查任务是否所使用虚拟内存是否超过配额,超过则强制关闭</description> </property> <property> <name>yarn.nodemanager.vmem-pmem-ratio</name> <value>2</value> <description>任务每使用 1MB 物理内存,最多可使用虚拟内存量</description> </property> <!-- 队列管理 --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value> <description>设置队列资源分配策略为 FairScheduler,公平分配策略</description> </property> <property> <name>yarn.scheduler.fair.user-as-default-queue</name> <value>ture</value> <description>未指定队列时,则默认在 default 队列运行任务</description> </property> <property> <name>yarn.scheduler.fair.allocation.file</name> <value>/data/service/hadoop/hadoop-3.1.1/etc/hadoop/yarn-fairscheduler.xml</value> <description>指定队列配置文件路径</description> </property> <property> <name>yarn.scheduler.fair.preemption</name> <value>false</value> <description>关闭抢占机制</description> </property> <!-- 参数优化 --> <property> <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name> <value>95</value> <description>磁盘运行状态检查程序的最大磁盘利用率百分比。基于磁盘总量及 HDFS 使用率</description> </property> </configuration>
创建 $/HADOOP_HOME/etc/hadoop/yarn-fairscheduler.xml
- 若 yarn-site.xml 中 yarn.resourcemanager.scheduler.class 所指定的策略不是 FairScheduluer,则无需配置此文件
<?xml version="1.0"?> <allocations> <queue name="test_queue"> <!-- 最少队列资源保证量 --> <minResources>1024 mb,1vcores</minResources> <!-- 最大队列资源保证量 --> <maxResources>2048 mb,2vcores</maxResources> <!-- 最大同时运行任务量 --> <maxRunningApps>5</maxRunningApps> <!-- 队列权重 --> <weight>1.0</weight> <!-- 队列调度模式 --> <schedulingPolicy>fair</schedulingPolicy> <!-- 队列所允许用户 --> <aclSubmitApps>*</aclSubmitApps> <!-- 队列所允许用户组 --> <aclAdministerApps>*</aclAdministerApps> </queue> <queue name="hbase"> <minResources>2048mb,2vcores</minResources> <maxResources>4096mb,4vcores</maxResources> <maxRunningApps>10</maxRunningApps> <schedulingMode>fair</schedulingMode> <weight>2.0</weight> <aclSubmitApps>*</aclSubmitApps> <aclAdministerApps>hbase</aclAdministerApps> </queue> </allocations>
存算分离配置
- hadoop-HDFS 和 hadoop-YARN 默认使用同一个文件 $/HADOOP_HOME/etc/hadoop/workers 控制服务节点
- 若不进行调整,则 DN 和 NM 永远会是相同节点。也就是要么都部署,要么都不部署
- 通过如下调整,可独立调整 DN 和 NM 所部署节点,使其不会相互影响
1. 创建 $/HADOOP_HOME/etc/hadoop/nmworkers
redis1 redis2 redis3
2. 调整 $/HADOOP_HOME/sbin/start-yarn.sh
echo "Starting nodemanagers" hadoop_uservar_su yarn nodemanager "${HADOOP_YARN_HOME}/bin/yarn" --config "${HADOOP_CONF_DIR}" --workers --hosts "/data/service/hadoop/hadoop-3.1.1/etc/hadoop/nmworkers" --daemon start nodemanager (( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? ))
3. 调整 $/HADOOP_HOME/sbin/stop-yarn.sh
echo "Stopping nodemanagers" hadoop_uservar_su yarn nodemanager "${HADOOP_YARN_HOME}/bin/yarn" --config "${HADOOP_CONF_DIR}" --workers --hosts "/data/service/hadoop/hadoop-3.1.1/etc/hadoop/nmworkers" --daemon stop nodemanager
分发配置
- 将 /data/service/hadoop 分发至所有节点
启动 YARN
- RM-Master
$HADOOP_HOME/sbin/start-yarn.sh
- RM-HistoryServer
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
验证 YARN
- YARN Web UI:10.10.10.131:8088
- YARN Job History Web UI:10.10.10.131:19888
- 命令行查看
yarn rmadmin -getAllServiceState hadoop1:8033 standby hadoop2:8033 active yarn node -list 2022-11-16 14:52:41,372 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 Total Nodes:3 Node-Id Node-State Node-Http-Address Number-of-Running-Containers redis3:37413 RUNNING redis3:8042 0 redis1:36337 RUNNING redis1:8042 0 redis2:45012 RUNNING redis2:8042 0 yarn queue -status default 2022-11-16 13:47:14,829 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 Queue Information : Queue Name : root.default State : RUNNING Capacity : .0% Current Capacity : .0% Maximum Capacity : -100.0% Default Node Label expression : <DEFAULT_PARTITION> Accessible Node Labels : yarn queue -status test_queue 2022-11-16 13:47:21,889 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 Queue Information : Queue Name : root.test_queue State : RUNNING Capacity : .0% Current Capacity : .0% Maximum Capacity : -100.0% Default Node Label expression : <DEFAULT_PARTITION> Accessible Node Labels : yarn queue -status hbase 2022-11-16 13:47:27,761 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 Queue Information : Queue Name : root.hbase State : RUNNING Capacity : .0% Current Capacity : .0% Maximum Capacity : -100.0% Default Node Label expression : <DEFAULT_PARTITION> Accessible Node Labels :
常用操作
# 创建测试路径 hdfs dfs -mkdir -p hdfs://hdfscluster/test/input # 上传测试文本 hdfs dfs -put /data/service/hadoop/TheCountofMonteCristo.txt hdfs://hdfscluster/test/input/ # 提交 YARN 任务 # 不可提前创建输出路径,否则会报出目录已存在,导致任务失败 hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.1.jar wordcount hdfs://hdfscluster/test/input/TheCountofMonteCristo.txt hdfs://hdfscluster/test/output/ # 指定队列提交任务 hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.1.jar wordcount -Dmapreduce.job.queuename=test1 hdfs://hdfscluster/test/input/TheCountofMonteCristo.txt hdfs://hdfscluster/test/output/ # 查看所有正在运行的任务清单 yarn application -list Total number of applications (application-types: [], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):1 Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL application_1684895892069_0003 word count MAPREDUCE root root.root ACCEPTED UNDEFINED 0% N/A # 杀死指定任务 yarn application -kill <application id> # 查看某个任务的详细状态 yarn application -status <application id> Application Report : Application-Id : application_1684895892069_0003 Application-Name : word count Application-Type : MAPREDUCE User : root Queue : root.root Application Priority : 0 Start-Time : 1684896396433 Finish-Time : 0 Progress : 50% State : RUNNING Final-State : UNDEFINED Tracking-URL : http://redis1:45887 RPC Port : 34787 AM Host : redis1 Aggregate Resource Allocation : 57139 MB-seconds, 33 vcore-seconds Aggregate Resource Preempted : 0 MB-seconds, 0 vcore-seconds Log Aggregation Status : NOT_START Diagnostics : Unmanaged Application : false Application Node Label Expression : <Not set> AM container Node Label Expression : <DEFAULT_PARTITION> TimeoutType : LIFETIME ExpiryTime : UNLIMITED RemainingTime : -1seconds # 查看指定类型的任务清单 yarn application -list -appTypes <application type> Total number of applications (application-types: [MAPREDUCE], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):1 Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL application_1684895892069_0004 word count MAPREDUCE root root.root ACCEPTED UNDEFINED 0% N/A # 查看指定任务日志 yarn logs -applicationId <application id> >> app.log
动态调整队列配置
# 查看是否存在队列 test1 yarn queue -status test1 Cannot get queue from RM by queueName = test1, please check. # 将 test1 配置添加至 yarn-fairscheduler.xml 文件内 vi yarn-fairscheduler.xml <queue name="test1"> <minResources>2048mb,2vcores</minResources> <maxResources>4096mb,4vcores</maxResources> <maxRunningApps>10</maxRunningApps> <schedulingMode>fair</schedulingMode> <weight>2.0</weight> <aclSubmitApps>*</aclSubmitApps> <aclAdministerApps>hbase</aclAdministerApps> </queue> # 将 yarn-fairscheduler.xml 同步到所有 ResourceManager 中 scp yarn-fairscheduler.xml root@hadoop2:/data/service/hadoop/hadoop-3.1.1/etc/hadoop/ # 刷新队列配置 # 有时可能会自动刷新队列配置,但保险起见,建议在配置完成后,手动进行配置刷新 yarn rmadmin -refreshQueues # 复查队列 test1 状态 yarn queue -status test1 Queue Information : Queue Name : root.test1 State : RUNNING Capacity : .0% Current Capacity : .0% Maximum Capacity : -100.0% Default Node Label expression : <DEFAULT_PARTITION> Accessible Node Labels :