程序员的资源宝库

网站首页 > gitee 正文

基于Flink+HadoopKafkaFlume的HiBench测试框架相关

sanyeah 2024-03-29 17:07:16 gitee 6 ℃ 0 评论

一、环境变量配置

 1 #vim ~/.bashrc
 2 export STREAM_HOME=/home/stream
 3 export JAVA_HOME=$STREAM_HOME/jdk1.8.0_271
 4 export CLASSPATH=.:$JAVA_HOME/lib/
 5 export HADOOP_HOME=$STREAM_HOME/hadoop-2.8.3
 6 export PATH=$HADOOP_HOME/bin:$JAVA_HOME/bin:$PATH
 7 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
 8 export HADOOP_CLASSPATH='hadoop classpath'
 9 export FLINK_HOME=$STREAM_HOME/flink-1.10-SNAPSHOT
10 export PATH=$PATH:$FLINK_HOME/bin
11 #source ~/.bashrc
1 #vim /etc/hosts
2 215.9.51.171    master
3 215.9.51.169    slave1
4 215.9.51.170    slave2

二、Hadoop配置

2.1 core-site.xml

 1 <configuration>
 2   <property>
 3     <name>hadoop.tmp.dir</name>
 4     <value>/tmp/hadoop/tmp</value>
 5   </property>
 6   <property>
 7     <name>io.file.buffer.size</name>
 8     <value>131072</value>
 9   </property>
10   <property>
11     <name>fs.default.name</name>
12     <value>hdfs://master:9000</value>
13   </property>
14   <property>
15     <name>hadoop.proxyuser.root.hosts</name>
16     <value>*</value>
17   </property>
18   <property>
19     <name>hadoop.proxyuser.root.groups</name>
20     <value>*</value>
21   </property>
22 </configuration>

2.2 hdfs-site.xml

 1 <configuration>
 2   <property>
 3     <name>dfs.replication</name>
 4     <value>2</value>
 5   </property>
 6   <property>
 7     <name>dfs.namenode.name.dir</name>
 8     <value>/tmp/hadoop/name</value>
 9   </property>
10   <property>
11     <name>dfs.datanode.data.dir</name>
12     <value>/tmp/hadoop/data</value>
13   </property>
14   <property>
15     <name>dfs.permissions</name>
16     <value>false</value>
17   </property>
18   <property>
19     <name>dfs.webhdfs.enabled</name>
20     <value>true</value>
21   </property>
22 </configuration>

2.3 mapred-site.xml

 1 <configuration>
 2   <property>
 3     <name>mapreduce.framework.name</name>
 4     <value>yarn</value>
 5   </property>
 6   <property>
 7     <name>mapreduce.reduce.memory.mb</name>
 8     <value>2048</value>
 9   </property>
10   <property>
11     <name>mapreduce.map.memory.mb</name>
12     <value>2048</value>
13   </property>
14 </configuration>

2.4 yarn-site.xml

 1 <configuration>
 2   <property>
 3     <name>yarn.resourcemanager.address</name>
 4     <value>master:18040</value>
 5   </property>
 6   <property>
 7     <name>yarn.resourcemanager.scheduler.address</name>
 8     <value>master:18030</value>
 9   </property>
10   <property>
11     <name>yarn.resourcemanager.webapp.address</name>
12     <value>master:18088</value>
13   </property>
14   <property>
15     <name>yarn.resourcemanager.resource-tracker.address</name>
16     <value>master:18025</value>
17   </property>
18   <property>
19     <name>yarn.resourcemanager.admin.address</name>
20     <value>master:18141</value>
21    </property>
22   <property>
23     <name>yarn.nodemanager.aux-services</name>
24     <value>mapreduce_shuffle</value>
25   </property>
26   <property>
27     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
28     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
29   </property>
30   <property>
31     <name>yarn.nodemanager.resource.memory-mb</name>
32     <value>4096</value>
33   </property>
34   <property>
35     <name>yarn.scheduler.minimum-allocation-mb</name>
36     <value>2048</value>
37   </property>
38   <property>
39     <name>yarn.scheduler.maximum-allocation-mb</name>
40     <value>4096</value>
41   </property>
42 </configuration>

2.5 拷贝分发

scp -r hadoop-2.8.3 root@slave_host:/home/stream

2.6 启动

1 #mkdir /tmp/hadoop/name&data&tmp
2 ./bin/hdfs namenode -format
3 #Web UI:master_host:50070
4 ./sbin/start-dfs.sh
5 #Web UI:master_host:18088
6 ./sbin/start-yarn.sh

三、Flink相关配置

3.1 flink-conf.yaml

 1 jobmanager.rpc.address: master
 2 jobmanager.rpc.port: 6123
 3 jobmanager.heap.size: 1024m
 4 
 5 taskmanager.tmp.dirs: /home/stream/flink-1.10-SNAPSHOT/tmp
 6 taskmanager.heap.size: 1024m
 7 taskmanager.numberOfTaskSlots: 2
 8 parallelism.default: 1
 9 taskmanager.memory.preallocate: false
10 
11 state.backend.fs.checkpoints.dir: hdfs://master:9000/flink-checkpoints
12 fs.hdfs.hadoopconf: /home/stream/hadoop-2.8.3/etc/hadoop
13 fs.hdfs.hdfssite: /home/stream/hadoop-2.8.3/etc/hadoop/hdfs-site.xml
14 
15 rest.port: 8081
16 rest.address: 0.0.0.0
17 
18 taskmanager.memory.network.fraction: 0.1
19 taskmanager.memory.network.min: 64mb
20 taskmanager.memory.network.max: 1gb
21 
22 metrics.reporters: prom
23 metrics.reporter.prom.class: org.apache.flink.metrics.prometheus.PrometheusReporter
24 metrics.reporter.prom.port: 9250-9260

3.2 masters

master:8081

3.3 slaves

master
slave1
slave2

3.4 lib目录下补充jar包

1 flink-metrics-prometheus_2.11-1.7.2.jar
2 flink-shaded-hadoop-2-uber-2.8.3-10.0.jar

3.5 拷贝分发

scp -r flink-1.10 root@slave_host:/home/stream

四、Flume启动

./bin/flume-ng agent --conf conf --conf-file job/hibench.conf -name a2 -Dflume.root.logger=INFO,console -Dflume.monitoring.type=http -Dflume.monitoring.port=9888 &

五、HiBench启动

1 #deploy
2 ./bin/workloads/streaming/identity/prepare/genSeedDataset.sh
3 nohup ./bin/workloads/streaming/identity/prepare/dataGen.sh &
4 ./bin/workloads/streaming/identity/flink/run.sh
5 #destroy
6 yanr application -kill application_id

 

Tags:

本文暂时没有评论,来添加一个吧(●'◡'●)

欢迎 发表评论:

最近发表
标签列表