Gunther Oracle APEX und Datenbank Wiki

Die Oracle NoSQL per MapReduce mit Hadoop verwenden

Mit der Hadoop Compatiblen InputFormat Klasse „oracle.kv.hadoop.KVInputFormat“ lässt sich einfach die Oracle NoSQL Datenbank mit Hadoop verwenden.

Die einzige Schwierigkeit besteht darin Hadoop den richtigen Klassenpfad zu übergeben. Als letzte Lösung hilft oft nur die „kvclient.jar“ Library mit die eigenen Jar Datei zu integrieren.

Leider ist mir nicht gelungen die Klassenpfad mit „-libjars $KVHOME/lib/kvclient.jar“ beim Aufruf des Jobs zu übergeben. In keiner Kombination aller Parameter wurde die Klasse gefunden.

Folgender Fehler beim Mapper Task:

Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class oracle.kv.hadoop.KVInputFormat not found at

Daher die kvclient.jar mit meine Klassen in einer Jar Datei verpackt.

Beispiel:

Mapper Klasse:

package gpi.hadoop;
 
import java.io.IOException;
 
import java.util.StringTokenizer;
 
import oracle.kv.Key;
 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
 
// input Key - input Value - output Key - output Value
public class PWDStoreMapper extends Mapper<Text, Text, Text, IntWritable> {
 
 
    static IntWritable oneValue = new IntWritable(1);
 
    @Override
    // input Key - input Value - output Value
    public void map(Text KVKey, Text valueArg, Context context) throws IOException, InterruptedException {
 
        // read on key of the store
        String keyName;
 
        Key key = Key.fromString(KVKey.toString());
 
       // Convert back to canonical format
        keyName = new StringBuffer(Key.createKey(key.getMajorPath()).toString()).toString();                
 
       context.write(new Text(keyName), oneValue);      
 
    }
}

Reducer Klasse:

package gpi.hadoop;
 
import java.io.IOException;
 
import java.util.Iterator;
 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
 
 
public class PWDStoreReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
 
    private IntWritable totalWordCount = new IntWritable();
 
    @Override
    public void reduce(Text KVKey, Iterable<IntWritable> counts, Context context) throws IOException,
                                                                                        InterruptedException {
        int keycount = 0;
        for (IntWritable count : counts) {
            keycount += 1;
        }
        context.write(KVKey, new IntWritable(keycount));
    }
}

Job Definition

package gpi.hadoop;
 
import oracle.kv.hadoop.KVInputFormat;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
 
public class PWDStoreHadoop  {
 
 
    public static void main(String[] args) throws Exception {
 
        Configuration conf = new Configuration();
 
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
 
        // create a new Configuration
        Job job = Job.getInstance(conf);
        job.setJobName("PWDHadoopStoreReading");
 
        //main driver Class
        job.setJarByClass(PWDStoreHadoopT.class);
 
        // set the Input Format Classe
        job.setInputFormatClass(KVInputFormat.class);
        KVInputFormat.setKVStoreName("kvstore");
        //Parameter for the Input Format Class
        String [] kvhostList = {"bigdatalite:5000"};
        KVInputFormat.setKVHelperHosts(kvhostList);
 
        //set the Mapper
        job.setMapperClass(PWDStoreMapper.class);
 
        // Reducer
        job.setReducerClass(PWDStoreReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
 
        //set Output Class
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
 
        // Execute job and return status
        job.submit();
    }
}

Die Oracle NoSQL per MapReduce mit Hadoop verwenden

Beispiel:

Quellen