package testMap;
import java.io.IOException; import java.util.StringTokenizer;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.util.*;
public class WordCount extends Configured implements Tool { public static class Map extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1); private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); int count = 0; Text aa = new Text(); aa.set("AA"); while (itr.hasMoreTokens()) { count++; word.set(itr.nextToken()); System.out.println("word=" + word.toString() + ",count=" + count);
context.write(word, one); }
} }
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0; for (IntWritable val : values) { System.out.println("val=" + val.get()); sum += val.get(); } result.set(sum); context.write(key, result); } }
public int run(String[] args) throws Exception { boolean useJobTracker = true; Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://localhost:9000"); if (useJobTracker) conf.set("mapred.job.tracker", "localhost:9001"); else conf.set("mapred.job.tracker", "local");
FileSystem hdfs = FileSystem.get(conf);
Job job = new Job(conf, "WordCount"); job.setJarByClass(WordCount.class); job.setJobName("WordCount"); job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path dst_path = new Path(args[1]);
if (hdfs.exists(dst_path)) { hdfs.delete(dst_path, true); System.out.println("檔案已經存在"); } else { System.out.println("檔案不存在"); } FileOutputFormat.setOutputPath(job, dst_path);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;// 如果是true 回傳0 如果fales
}
public static void main(String[] args) throws Exception { int ret = ToolRunner.run(new WordCount(), args); System.out.println("new WordCount"); System.exit(ret);
}
}
以下是錯誤訊息: 13/12/23 03:58:24 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 13/12/23 03:58:25 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 13/12/23 03:58:25 INFO input.FileInputFormat: Total input paths to process : 3 13/12/23 03:58:25 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 13/12/23 03:58:25 WARN snappy.LoadSnappy: Snappy native library not loaded 13/12/23 03:58:25 INFO mapred.JobClient: Running job: job_201312222115_0016 13/12/23 03:58:26 INFO mapred.JobClient: map 0% reduce 0% 13/12/23 03:58:55 INFO mapred.JobClient: Task Id : attempt_201312222115_0016_m_000000_0, Status : FAILED java.lang.RuntimeException: java.lang.ClassNotFoundException: testMap.WordCount$Map at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:867) at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Unknown Source) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.ClassNotFoundException: testMap.WordCount$Map at java.net.URLClassLoader$1.run(Unknown Source) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Unknown Source) at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:820) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865) ... 8 more
煩請大大門指教 謝謝!!我是用(windoop寫的)
|