非常感謝jazz大!!
不過我又有一個問題了,我再增加了一個Static 的一維陣列global。
global[0]主要是儲存map做完運算後的sum1,global[1]則是z,然後再傳給reduce印出來。
結果印出來的值都是null...
感覺這個陣列似乎跟之前一樣會傳不過去reduce,目前已經知道該怎麼傳參數,可是陣列我試了好久還是不知道該怎麼辦,可否再提示一下呢?
以下是程式碼:
代碼:
package wordcount;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Progressable;
import java.util.Scanner;
public class WordCount {
static String[] Path = new String[2];
static int count = 0;
static int a, b;
static Double[] global = new Double[2];
public static class TokenizerMapper extends Mapper<Text, Text, Text, Text>{
public void setup(Context context)
{
a = context.getConfiguration().getInt("wordcount.case.sensitive",0);
b = context.getConfiguration().getInt("wordcount.case.sensitive2",0);
}
public void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
String[] values = value.toString().split(",");
String result = new String();
double x = Double.valueOf(values[0]);
double y = Double.valueOf(values[1]);
double sum1 = x + y;
double z = y*a;
global[0] = sum1;
global[1] = z;
result = sum1 +","+ z;
context.write(key, new Text(result));
}
}
public static class IntSumReducer extends Reducer<Text,Text,Text,Text> {
public void setup(Context context)
{
a = context.getConfiguration().getInt("wordcount.case.sensitive",0);
b = context.getConfiguration().getInt("wordcount.case.sensitive2",0);
}
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text val : values) {
String[] revalues = val.toString().split(",");
String f_result = new String();
double m = Double.valueOf(revalues[0]);
double n = Double.valueOf(revalues[1]);
double sum2 = m + n*2;
double o = m*b;
f_result = sum2 + "," + o + "," + global[0] + "," + global[1];
context.write(key, new Text(f_result));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.getInt("wordcount.case.sensitive", 0);
conf.getInt("wordcount.case.sensitive2", 0);
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
/*
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
*/
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
使用的指令如下:
代碼:
/opt/hadoop/bin/hadoop jar /home/n4540/wordcount.jar -Dwordcount.case.sensitive=2 -Dwordcount.case.sensitive2=3 test2 output
輸入檔為:
1<tab>0.1,0.3
2<tab>0.5,0.2
出來的結果如下:
代碼:
1 1.6,1.2000000000000002,null,null
2 1.5,2.0999999999999996,null,null