Hadoop2.0 YARN cloudra4.4.0 WordCount实例-蒲公英云

Hadoop2.0 YARN cloudra4.4.0 WordCount实例

SouthEast

其他都没啥这个 jar都在这里了。

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
  public static class TokenizerMapper 
       extends Mapper<Object, Text, Text, IntWritable>{
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();
    public void map(Object key, Text value, Context context
                    ) throws IOException, InterruptedException {
      StringTokenizer itr = new StringTokenizer(value.toString());
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        context.write(word, one);
      }
    }
  }
  public static class IntSumReducer 
       extends Reducer<Text,IntWritable,Text,IntWritable> {
    private IntWritable result = new IntWritable();
    public void reduce(Text key, Iterable<IntWritable> values, 
                       Context context
                       ) throws IOException, InterruptedException {
      int sum = 0;
      for (IntWritable val : values) {
        sum += val.get();
      }
      result.set(sum);
      context.write(key, result);
    }
  }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: wordcount <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

导出为Jar包，上传（发现这个版本居然不知道怎么在Eclipse里直接跑，只好上传了）

SouthEast 1

./hadoop jar ../etc/hadoop/WordCount.jar /user/hadoop/input /user/hadoop/output

input里是yard02的etc/profile文件

结果：

!=    1
"$BASH"    2
"$PS1"    1
"/bin/sh"    1
"`id    1
#    6
$i    2
&&    1
'    3
(bash(1),    1
(sh(1))    1
-d    1
-eq    1
-f    1
-r    1
-u`"    1
.    2
...).    1
.profile    1
/etc/bash.bashrc    2
/etc/login.defs.    1
/etc/profile.d    1
/etc/profile.d/*.sh;    1
/etc/profile:    1
0    1
Bourne    2
CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH    1
JAVA_HOME=/usr/local/jdk1.7.0_21    1
JRE_HOME=/usr/local/jdk1.7.0_21/jre    1
PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH    1
PS1.    1
PS1='#    1
PS1='$    1
PS1='\h:\w\$    1
See    1
The    2
[    7
]    1
];    6
already    1
and    2
ash(1),    1
bash.bashrc    1
by    1
compatible    1
default    2
do    1
done    1
else    2
export    4
fi    6
file    2
for    2
handled    1
i    2
if    6
in    1
is    1
ksh(1),    1
now    1
pam_umask(8)    1
pam_umask.    1
sets    1
shell    1
shells    1
system-wide    1
the    2
then    6
umask    1
unset    1