Hadoop2.0 YARN cloudra4.4.0 WordCount实例

太过爱你忘了你带给我的痛 2022-09-18 09:55 149阅读 0赞

SouthEast

其他都没啥 这个 jar都在这里了。

  1. import java.io.IOException;
  2. import java.util.StringTokenizer;
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.IntWritable;
  6. import org.apache.hadoop.io.Text;
  7. import org.apache.hadoop.mapreduce.Job;
  8. import org.apache.hadoop.mapreduce.Mapper;
  9. import org.apache.hadoop.mapreduce.Reducer;
  10. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  11. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  12. import org.apache.hadoop.util.GenericOptionsParser;
  13. public class WordCount {
  14. public static class TokenizerMapper
  15. extends Mapper<Object, Text, Text, IntWritable>{
  16. private final static IntWritable one = new IntWritable(1);
  17. private Text word = new Text();
  18. public void map(Object key, Text value, Context context
  19. ) throws IOException, InterruptedException {
  20. StringTokenizer itr = new StringTokenizer(value.toString());
  21. while (itr.hasMoreTokens()) {
  22. word.set(itr.nextToken());
  23. context.write(word, one);
  24. }
  25. }
  26. }
  27. public static class IntSumReducer
  28. extends Reducer<Text,IntWritable,Text,IntWritable> {
  29. private IntWritable result = new IntWritable();
  30. public void reduce(Text key, Iterable<IntWritable> values,
  31. Context context
  32. ) throws IOException, InterruptedException {
  33. int sum = 0;
  34. for (IntWritable val : values) {
  35. sum += val.get();
  36. }
  37. result.set(sum);
  38. context.write(key, result);
  39. }
  40. }
  41. public static void main(String[] args) throws Exception {
  42. Configuration conf = new Configuration();
  43. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  44. if (otherArgs.length != 2) {
  45. System.err.println("Usage: wordcount <in> <out>");
  46. System.exit(2);
  47. }
  48. Job job = new Job(conf, "word count");
  49. job.setJarByClass(WordCount.class);
  50. job.setMapperClass(TokenizerMapper.class);
  51. job.setCombinerClass(IntSumReducer.class);
  52. job.setReducerClass(IntSumReducer.class);
  53. job.setOutputKeyClass(Text.class);
  54. job.setOutputValueClass(IntWritable.class);
  55. FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  56. FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  57. System.exit(job.waitForCompletion(true) ? 0 : 1);
  58. }
  59. }

导出为Jar包,上传(发现这个版本居然不知道怎么在Eclipse里直接跑,只好上传了)

SouthEast 1

./hadoop jar ../etc/hadoop/WordCount.jar /user/hadoop/input /user/hadoop/output

input里是yard02的etc/profile文件

结果:

  1. != 1
  2. "$BASH" 2
  3. "$PS1" 1
  4. "/bin/sh" 1
  5. "`id 1
  6. # 6
  7. $i 2
  8. && 1
  9. ' 3
  10. (bash(1), 1
  11. (sh(1)) 1
  12. -d 1
  13. -eq 1
  14. -f 1
  15. -r 1
  16. -u`" 1
  17. . 2
  18. ...). 1
  19. .profile 1
  20. /etc/bash.bashrc 2
  21. /etc/login.defs. 1
  22. /etc/profile.d 1
  23. /etc/profile.d/*.sh; 1
  24. /etc/profile: 1
  25. 0 1
  26. Bourne 2
  27. CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH 1
  28. JAVA_HOME=/usr/local/jdk1.7.0_21 1
  29. JRE_HOME=/usr/local/jdk1.7.0_21/jre 1
  30. PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH 1
  31. PS1. 1
  32. PS1='# 1
  33. PS1='$ 1
  34. PS1='\h:\w\$ 1
  35. See 1
  36. The 2
  37. [ 7
  38. ] 1
  39. ]; 6
  40. already 1
  41. and 2
  42. ash(1), 1
  43. bash.bashrc 1
  44. by 1
  45. compatible 1
  46. default 2
  47. do 1
  48. done 1
  49. else 2
  50. export 4
  51. fi 6
  52. file 2
  53. for 2
  54. handled 1
  55. i 2
  56. if 6
  57. in 1
  58. is 1
  59. ksh(1), 1
  60. now 1
  61. pam_umask(8) 1
  62. pam_umask. 1
  63. sets 1
  64. shell 1
  65. shells 1
  66. system-wide 1
  67. the 2
  68. then 6
  69. umask 1
  70. unset 1

发表评论

表情:
评论列表 (有 0 条评论,149人围观)

还没有评论,来说两句吧...

相关阅读

    相关 Hadoop Yarn

    一、Yarn架构 ![yarn-architecture][] Yarn就是将JobTracker的职责进行拆分,将资源管理和任务调度监控拆分成独立的进程:一个全局的资