一、分布式集群的文件配置
涉及$HADOOP_HOME/etc/hadoop路径下的5个文件
workers、core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml
首先修改workers
进入$HADOOP_HOME/etc/hadoop
vim workers
编辑自己的主机节点。注意!每行一个,默认为把本机节点同时作为数据节点(dn)和名称节点(nn),如果只想做名称节点(nn)就不要添加那台主机名

之后修改core-site.xml
<configuration>
<!-- The address of Namenode -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop100:8020</value>
</property>
<!-- The address of saving data -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.3.3/data</value>
<description>Abase for other temporary directories.</description>
</property>
</configuration>
修改hdfs-site.xml
<configuration>
<!-- nn web access address -->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop100:9870</value>
</property>
<!--n 2nn web access address -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop103:9868</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
修改mapred-site.xml
<configuration>
<!-- Specifies MR to run on yarn -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- Historical server server-side address -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop100:10020</value>
</property>
<!-- The web address of the historical server -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop100:19888</value>
</property>
</configuration>
修改yarn代码
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- Specify the address of resourcemanager -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop102</value>
</property>
<!-- The way to Specify MR is shffule -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- Enable the log aggregation ferture -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- Set the address of the log aggregation server -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop100:19888/jobhistory/logs</value>
</property>
<!-- Set the log retrntion period to 7 days -->
<property>
<name>yarn.log-aggregation,retain-seconds</name>
<value>604800</value>
</property>
</configuration>
然后将配置分发到其他机器中
cd /opt/module/hadoop-3.3.3/etc xsync hadoop/
分发脚本