Friday 4 January 2019

Apache PIG as a Client running in remote Hadoop Cluster


The intent is to use the Apache PIG API's to execute PIG Script from Client application on Remote Hadoop Cluster.

PIG Version : pig-0.12.0
Apache Hadoop Version : Hadoop 1.2.1

import java.io.IOException;
import java.util.Properties;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;



public class PigClient {

    public static void main (final String[] args) {
        PigServer pig;
        try {

            final Properties properties = new Properties ();
            properties.put ("fs.default.name", "hdfs://x.x.x.x:9000");
            pig = new PigServer (ExecType.MAPREDUCE, properties);
            System.out.println ("-----------Connected to Hadoop Server-------");
            pig.registerScript ("E:\\wordcount.pig");
        } catch (final ExecException e) {
            e.printStackTrace ();
        } catch (final IOException e) {
            e.printStackTrace ();
        }
    }
}