cd /opt
sudo wget https://downloads.apache.org/pig/latest/pig-0.17.0.tar.gz
sudo tar -xvzf pig-0.17.0.tar.gz
export PIG_HOME=/opt/pig
export PATH=$PATH:$PIG_HOME/bin
export PIG_CLASSPATH=$HADOOP_HOME/etc/Hadoop
Note:
• HADOOP_HOME must already be set (you said Hadoop is installed).
• If not, you might have to set it too:
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
mkdir pig_project
cd pig_project
1,Jayan,Math,85
2,Akshara,English,78
3,Bararth,Math,92
4,John,English,88
5,Charan,Math,90
nano student_operations.pig
students = LOAD 'students.txt' USING PigStorage(',') AS (student_id:int, name:chararray, subject:chararray, score:int);
high_scores = FILTER students BY score > 80;
projected = FOREACH high_scores GENERATE name, score;
grouped = GROUP projected BY name;
flattened = FOREACH grouped {
sorted = ORDER projected BY score DESC;
GENERATE group AS name, sorted;
};
DUMP flattened;
pig -x local student_operations.pig