Monday, August 25, 2014

Apache Pig Working Example

# Uploading the Datasets into Hadoop HDFS

bigdata@bigdata/$ hadoop fs -mkdir pig
bigdata@bigdata/$ hadoop fs -ls
Found 4 items
drwxr-xr-x   - bigdata supergroup          0 2014-07-07 13:01 /user/bigdata/backup
drwxr-xr-x   - bigdata supergroup          0 2014-07-09 17:40 /user/bigdata/datasets
drwxr-xr-x   - bigdata supergroup          0 2014-07-10 11:51 /user/bigdata/imagepath
drwxr-xr-x   - bigdata supergroup          0 2014-07-10 18:10 /user/bigdata/pig
bigdata@bigdata/$ hadoop fs -put /home/bigdata/download/pig/excite-small.log /user/bigdata/pig
bigdata@bigdata/$ hadoop fs -ls /user/bigdata/pig
Found 1 items
-rw-r--r--   1 bigdata supergroup     208348 2014-07-10 18:05 /user/bigdata/pig/excite-small.log
bigdata@bigdata/$

# Open Pig in Terminal

grunt> log  LOAD '/user/bigdata/pig/excite-small.log' AS (user, timestamp, query);
grunt> grpd  GROUP log BY user;
grunt> cntd  FOREACH grpd GENERATE group, COUNT(log);
grunt> STORE cntd INTO '/user/bigdata/pig/group_output';

2014-07-10 18:08:42,840 [main] INFO  org.apache.pig.tools.pigstats.ScriptState - Pig features used in the script: GROUP_BY
2014-07-10 18:08:42,840 [main] INFO  org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer - {RULES_ENABLED[AddForEach, ColumnMapKeyPrune, GroupByConstParallelSetter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, PartitionFilterOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter], RULES_DISABLED[FilterLogicExpressionSimplifier]}
2014-07-10 18:08:42,842 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false
2014-07-10 18:08:42,842 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.CombinerOptimizer - Choosing to move algebraic foreach to combiner
2014-07-10 18:08:42,843 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1
2014-07-10 18:08:42,843 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1
2014-07-10 18:08:42,849 [main] INFO  org.apache.pig.tools.pigstats.mapreduce.MRScriptState - Pig script settings are added to the job
2014-07-10 18:08:42,850 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2014-07-10 18:08:42,850 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Reduce phase detected, estimating # of required reducers.
2014-07-10 18:08:42,850 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Using reducer estimator: org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator
2014-07-10 18:08:42,851 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator - BytesPerReducer1000000000 maxReducers999 totalInputFileSize208348
2014-07-10 18:08:42,851 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Setting Parallelism to 1
2014-07-10 18:08:42,851 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - This job cannot be converted run in-process
2014-07-10 18:08:42,851 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - creating jar file Job5158749338361837443.jar
2014-07-10 18:08:44,644 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - jar file Job5158749338361837443.jar created
2014-07-10 18:08:44,647 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - Setting up single store job
2014-07-10 18:08:44,647 [main] INFO  org.apache.pig.data.SchemaTupleFrontend - Key [pig.schematuple] is false, will not generate code.
2014-07-10 18:08:44,647 [main] INFO  org.apache.pig.data.SchemaTupleFrontend - Starting process to move generated code to distributed cacche
2014-07-10 18:08:44,647 [main] INFO  org.apache.pig.data.SchemaTupleFrontend - Setting key [pig.schematuple.classes] with classes to deserialize []
2014-07-10 18:08:44,668 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 1 map-reduce job(s) waiting for submission.
2014-07-10 18:08:44,811 [JobControl] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2014-07-10 18:08:44,811 [JobControl] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1
2014-07-10 18:08:44,812 [JobControl] INFO  org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths (combined) to process : 1
2014-07-10 18:08:45,168 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - HadoopJobId: job_201407101041_0003
2014-07-10 18:08:45,168 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Processing aliases cntd,grpd,log
2014-07-10 18:08:45,168 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - detailed locations: M: log[9,6],cntd[11,7],grpd[10,7] C: cntd[11,7],grpd[10,7] R: cntd[11,7]
2014-07-10 18:08:45,168 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://bigdata50030/jobdetails.jsp?jobidjob_201407101041_0003
2014-07-10 18:08:45,172 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 0% complete
2014-07-10 18:08:45,172 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_201407101041_0003]
2014-07-10 18:08:48,178 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete
2014-07-10 18:08:48,179 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_201407101041_0003]
2014-07-10 18:08:55,205 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 66% complete
2014-07-10 18:08:55,205 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_201407101041_0003]
2014-07-10 18:08:56,207 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Running jobs are [job_201407101041_0003]
2014-07-10 18:09:00,250 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 100% complete
2014-07-10 18:09:00,251 [main] INFO  org.apache.pig.tools.pigstats.mapreduce.SimplePigStats - Script Statistics:

HadoopVersion PigVersion UserId StartedAt FinishedAt Features
1.2.1 0.13.0 bigdata 2014-07-10 18:08:42 2014-07-10 18:09:00 GROUP_BY

Success!

Job Stats (time in seconds):
JobId Maps Reduces MaxMapTime MinMapTIme AvgMapTime MedianMapTime MaxReduceTime MinReduceTime AvgReduceTime MedianReducetime Alias Feature Outputs
job_201407101041_0003 1 1 1 1 1 1 8 8 8 8 cntd,grpd,log GROUP_BY,COMBINER /user/bigdata/pig/group_output,

Input(s):
Successfully read 4501 records (208725 bytes) from: "/user/bigdata/pig/excite-small.log"

Output(s):
Successfully stored 891 records (17051 bytes) in: "/user/bigdata/pig/group_output"

Counters:
Total records written : 891
Total bytes written : 17051
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0

Job DAG:
job_201407101041_0003


2014-07-10 18:09:00,268 [main] INFO  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!

# Analyzing the Output

bigdata@bigdata/$ hadoop fs -ls /user/bigdata/pig/
Found 2 items
-rw-r--r--   1 bigdata supergroup     208348 2014-07-10 18:05 /user/bigdata/pig/excite-small.log
drwxr-xr-x   - bigdata supergroup          0 2014-07-10 18:15 /user/bigdata/pig/group_output
bigdata@bigdata/$ hadoop fs -ls /user/bigdata/pig/group_output
Found 3 items
-rw-r--r--   1 bigdata supergroup          0 2014-07-10 18:15 /user/bigdata/pig/group_output/_SUCCESS
drwxr-xr-x   - bigdata supergroup          0 2014-07-10 18:15 /user/bigdata/pig/group_output/_logs
-rw-r--r--   1 bigdata supergroup      17051 2014-07-10 18:15 /user/bigdata/pig/group_output/part-r-00000
bigdata@bigdata/$

hadoop fs -cat /user/bigdata/pig/group_output/part-r-00000

6D2B4F8DEE6D3EAF 2
6D39FA30ABF97CF1 3
6D3EDFFC5B370C42 9
6D906622D87278E5 3
6D9E037CA1E489A3 7
6DC44AB70EB110CB 4
6E2A4B3FED94E84D 5
6E75DE6D131ADADF 9
6EBE46CEB6BD249E 1
6F0A679A71DC2F39 4
6FA5EF0FFF3D6CB3 1
6FB3D2D282761F25 32
6FFB6B341F8FA1F1 1
7039BF2E7257EC24 1
70AFB9518EB9997A 3
70F5A03AEC3DA7BF 9
714E77EBD1691710 3
719CF3C90004051C 1
71CAF72D8BF0CE30 6
71D2D4E6C01FD7E1 1
71ED89525010D11A 5
72270DEAFE0BF9FC 3
726DA9740623758A 2
729B745475893F44 2
72DF27659DE7BE5A 2
736AB65C0C0C10EC 1
736D28D439E9FE2D 1
73AEEF0996A27551 1
73BD52528B217820 1
73D74648CC2CA35E 1
73D7DE81DF856F8D 7
74165896F4654D30 2
742C7109B9A57A73 14
74406AAFF322E81B 3
74BB49A63C0C4D79 7
752FE259E734662C 15
75C18D86685AAEAE 4
75F1B47D5BADE010 3
762B8ED16AC158C6 2
762F03C6189BBB1D 1
763A0EADC8BD3533 1
765EAEA8FC0AC936 3
76D05AFAC10D18D0 2
76FC27D6D468BC91 1
772DFF55A7E701DC 3
77752FDFA121C234 10
778EBE06AC999541 2
77AC89619076A8E1 2
781BF65D3D769ED3 2
78D021506018889F 2
78EC0A6026552159 2
790FC18760C238A6 2
7965E55C29F534F3 5
79785E25B2F213B8 4
7981B1CD3861167E 13
79E7FA8E26F7349E 1
7A17DCDA7EB5033C 1
7A8D9CFC957C7FCA 2
7AE07E7F0053F0A9 1
7AFA10B67193DBF4 1
7B99756B742D8E89 5
7BABD6CD11ABD104 6
7C32FF6A8E176FA6 1
7C60C0A2EBF7A3E7 2
7CCA971B578450AB 8
7D1DD1781EDB79A0 9
7D286B5592D83BBE 59
7D61F86F1732EDC6 1
7DF76165C8DCAD93 8
7E0D6592BC38322F 1
7E4DF85483B4A9E8 6
7E7371B2288BF353 2
7F173C311C29F64A 4
7F419DC93BF79BD3 3
7F88C9EC4CD0BB3A 4
7FEACB5630B25683 6
80BC451BC7EE9FE0 1
810EFC647D40E4CB 1
818D7157855D5D48 2
8192D9F4FCBDA81B 2
81CC31A8588135F2 2
81E73204D37D7E50 13
8223F74BED5A061A 3
824F413FA37520BF 10
82A061D8FA28AAC4 3
833A52784CCEF115 1
83607290B8BEAFC6 1
83FD0A3E1FB0FAD9 5
83FE595CE7D05209 1
84100211438C80DC 3
84312FE558AC325A 3
8462E97CB561C077 2
85690B11E1ED5FB5 6
858E8CCC3D889E86 5
85D5A78E64418242 1
86609B9799FE7CA5 1
86D1E09F8F8F5B9F 2
86EAEA913CC8D7C4 3
8775EDC82244F40A 4
887E337AB02C2BF7 10
88ED9040788FF9FD 2
893C3ADD0EFBBECB 10
8A095E9B925D411D 17
8A7BC9076D6F166F 4
8AFBE95F88FA5C99 3
8B2065581C770F50 4
8B6A0AB3CF0804D1 4
8BCE9868E3F8CC75 2
8C549AB7E029345E 2
8CDEE772A295AA02 5
8D2E263D44C09DA0 1
8D4626A753C2D3CF 2
8DCC6FC2AC2EBB4F 2
8DD654BF9AD99482 1
8DDB5A84C1C94E60 1
8E0A39B262A2C60B 14
8E1A8EA81FEA8A30 4
8E1E0FBF51628427 1
8E53B09082CF6A75 2
8EDC8ECDA4AD017E 4
8EE83362186F49EF 3
8F0ECFEDAB4A03DB 3
8FE55B4D65B22166 1
90498194D0D0A2F8 10
90B21F67EEA27FEA 5
910B9A0392D29303 3
917FDFC55A0EA9ED 38
918BE10E19D4E24F 2
91A98BC9BEDCF053 1
91AC6C05E866DAF4 7
91C510E369703D99 10
9218F60AF54851E4 2
923C890A8A149FD5 2
936BADBE23F87A96 4
93750016D02B541F 1
937EB54AB1F9EE94 5
93CE4FF9E36FA112 5
93E0FC7EE40C63BF 4
944E332F3090AA60 11
9471B615A518B7E4 4
949946B881F137F0 2
94B5B95EAEED1FE7 1
9541D2047C5360F9 4
972F13CE9A8E2FA3 1
9742AECDDE7E5895 2
975635FE3F837969 4
977C9CEB63318175 1
977E1B646010C88E 1
97E8A9F4A0DFD224 6
986AC2B6E1384999 1
98825190824FBCEC 7
98F5BBD3754D292F 3
98FA1E93D617E416 3
9912390F5E1D690F 3
99BA461C4F96233F 4
99D8C7D14A864902 5
9A04A6464335D051 3
9A10B373FA529557 3
9A33FFD53E103291 1
9A5F075ABDE5635D 24
9AD67B4FE4D37CDB 4
9B6691E12BC09D27 4
9BB2A0503EE9CE8E 7
9BC25E584304AEFA 26
9C431E20C78D50AD 1
9C8FA03A0D9DF175 4
9CF1A20154759F8F 1
9D5447106CB898E3 9
9D7A7624B927FE31 12
9DB263190BB17AC2 4
9E1707EE57C96C1E 12
9EAF527F15CABB79 1
9F9453D10F3C6718 1
A0037BF72EC3BA42 6
A01609F239CC8A05 6
A01C8755A311CD61 3
A02D95B65ECAFADD 2
A127C018E4812A29 2
A1801A1ACC7BE15C 1
A1A9D53780361768 2
A1C6967657FF9158 1
A1CFAE0FF0E6CFDE 1
A1F547F916AD8A43 2
A215ACD1331A1E5F 1
A224DDFAF7314978 3
A25C8C765238184A 8
A2800E21FDCEE2BF 10
A320339E3C0BE4AA 1
A369CD69E8B08A1A 11
A3C564B7D1FA1EA0 1
A40BA4A81324E029 2
A5033398CB2B7728 5
A5A6085F03DA0416 2
A5EB957C8CBFB0CD 4
A628C436696FE1B7 2
A67BC352137D9E89 4
A6867C13B8B29D8A 2
A68E8A400F6B4C56 1
A6EEB808BC4324D2 4
A71ABB39E3E46318 18
A71C7704625F3DF4 10
A7807FC4C410F719 2
A7863A716CA7614D 14
A7FB2A8002E86F26 10
A8983AA53DDA6E62 3
A8A0674EA33D1249 22
A9167118E28877F1 2
A93156BD79F164A4 15
AA17D9D7A97BF879 11
AA2BA7F06CBD473D 1
AA50F12D6650122F 4
AA716408D075660C 1
AAA21900843C49E7 5
AAA6E4471629BC8F 47
AAAEFC630BA8D7B6 1
AAE7D472AA45AB96 4
AAFA3254AF25D0FC 7
AB0D6B51B487075A 3
AC43C9B376B132D5 9
AC5FD7086CB44602 1
AD461CB2E3D2B8D7 1
AD957FB1A4A86779 4
ADDF71B56E078EC1 18
ADFBC35853A325C6 5
AE27828868F61353 1
AE341CEB2D79E51D 1
AE5E2B48ED103FA0 3
AEB9383955EECF5B 5
AF44F3885296C45B 2
AF7BBE7E92E62D6E 2
AFF4AFE7145DC5C9 2
AFFDEFE691EAD2FA 4
B0274667D0A700A8 2
B038389D403E4C43 2
B0C1B6DC7370F24B 1
B0FBC83B9C9FBB21 1
B144CE6F1EDAB0DE 5
B163FAFD64AFAB18 16
B1E4391F6E6EFEF4 9
B21B920FD9253010 1
B21C28EF21B46438 2
B27D574886585DD6 6
B2B4FD80D447F15B 3
B2D86EFD1C83A81B 4
B3797986B594F03D 1
B3CEAE9CC28714CF 2
B42894B030717FB8 3
B439C4E265D35E3D 2
B451329A3E623408 1
B53A8E9C0F0A04B8 2
B61BFEA8D6B8369F 9
B7C5C0BCD35D4CC9 7
B80DC510FF1B6C01 1
B8E12AFC196C5FB7 2
B9436C1E65C39A9E 1
B9922F32F8DD2511 1
B9D3C28C13F46D1D 2
B9E187FD56A5C322 8
BA449E5E59C384BB 16
BAC295D278E3E496 1
BAE7F92AAD81B7C5 8
BB925FF85FF44849 2
BBBBA6C4B71C1455 8
BC383CAF4C39027A 1
BC492F4E132262FE 9
BCB8F383043E184C 2
BCD36229594FAAF4 1
BCD90B7247D8FC7C 1
BD4D0061A2CB3CC9 1
BD6739C2A5932AE7 4
BDDDD3F6DA8557A4 2
BDEB5480328AEB41 10
BE2650AD779FC652 14
BE4B27358BABBC46 4
BE95DF3EAD425CAB 7
BED75271605EBD0C 20
BF67450937AA9990 3
BF6A27B4287138DE 2
BF76256C3A233A8A 2
BFDFC6040837EE14 4
C01F07D111E19068 2
C040A1754EEF11B1 1
C0733122E43CFD82 1
C07C30D02210A05E 1
C07D4ECD1ACE0C89 2
C0916429A59CE5A1 4
C0BD480632F27E58 5
C1340737666AB6D7 5
C1896F8C0035B349 5
C1977F1B854584B3 4
C1C4228EA191F401 4
C1C9C378C3568522 5
C1ECD6FD44B29196 1
C207D5DC9D314B5B 1
C2482CBA783A419D 3
C28C7C97640037C1 1
C2E319C7310CF5CA 4
C32C5E6E8CE7DAFD 3
C33468CFBB6BBA02 1
C33FE9482743BD0F 1
C35A0850C4B94541 28
C3DC13DF9F22602C 1
C4176145E5944CA2 2
C444109E68351C02 10
C485DF6D1EA489BA 2
C5460576B58BB1CC 7
C5779DED2B0EA592 1
C5901C622223E71D 5
C5D01E05FF9CA265 2
C611A0BC0216E1CD 3
C68A35C476240F3D 10
C6A50F1089717BA1 8
C71BEDADCB745808 12
C73A5C29D1FC5C7D 2
C771C1E3DF333CDC 3
C7C6CF328CF46E0D 1
C7CA4669EBEAF90B 2
C80EB1206EAC493D 1
C81329DC0EF932FB 1
C86AA16FFD90B66C 14
C871CAE33E1EBD23 8
C89F34E15252E94A 5
C92AD22C24629491 1
C983FC6A580A67D4 6
C989A6531FD9EEC8 9
C99EB10EF3F2240E 1
C9EDF6F6F7C8C2C0 3
C9F4F61D48892F7B 2
CA15DFA42D265175 1
CB6EB7CE0467E74F 1
CB9EA2EEB8E11932 1
CBAEB52E28985C5E 1
CC3D6796CCB8F9B4 6
CC4F90BE5D6F0F9A 3
CC51BF8EC2ED9FD8 21
CCC1CC82483DD48E 4
CD37F95FC0886E1D 2
CD6DBDCB71996CDA 1
CDA3014FEEE660F2 2
CE09372F159CA389 1
CE65B6131CEBAC78 2
CEBE1A072B345F9F 1
CF5AFAEC0B19A940 4
CFE6B4DACA25B607 7
D058447C791B3F76 1
D0AA66103CEC6749 2
D0B7245F30B8170E 13
D0EA324518D428BE 2
D17494E7F006DB9A 4
D210EAD7F74E82EE 1
D25EF156EEE4AB94 11
D2A2F6B93EE290B0 6
D2E8CBAEF95A890B 18
D2FFE38AFF1C358A 2
D356BF7183CAA42E 3
D39275A3A8A2B21E 1
D3D3ED7BAD64DDC1 5
D49B04FF9BE2DFA1 2
D4D89B48594B5C6E 8
D4DA409F40BB9102 3
D4FAB7E5ED4E8BF8 1
D532DEB0BB3D50FD 5
D5D6264C66799EEB 1
D5D8220D36969861 3
D61E5828503E6438 8
D6316653B9793BB3 1
D74F0CDBC2EEB6E0 3
D7886648F0884E25 1
D7CACB5A4976AB9E 6
D87F01105536CAEB 21
D89ADE64C31D4963 5
D901F064DA40CC67 8
D9142519595FF9D1 2
D9804262D7097FA0 2
DA1CAD0C5D86B84B 2
DA22EE4DFE3C8179 8
DA4586C99882E0BE 3
DA8A7A56AE86C1ED 2
DA8CF5A56D67D01C 1
DA94D4B5A7C0D1AF 3
DA9DC83856C1269E 9
DAA8C88C7DA0F0B9 4
DAF7A3D38ED9A343 3
DB0CC854B82A662C 4
DB150CA81A21781F 1
DB1C66C105955633 2
DB2D5E0E0A0A11C6 3
DB38E7AF26F3AD9A 1
DB49308A76F8A6C4 7
DC4F3ECF90B35B9A 1
DCC5EACF75BCEF0E 3
DCF04899DF8CD6C7 10
DD36B11F3ADA30C8 3
DD99EA68707D6EBB 1
DE3AE35D76E898B0 19
DEA8DB3FF5F70B93 21
DF04028778B8D665 5
DF3E47213C887544 1
DF9BBDB4B1E1B8EC 2
DFDC0E9E4E3055AA 12
DFFFF72A42DD6526 30
E016B2DA270CB1B3 4
E075523C884E339E 12
E08CDDAE633645FB 3
E0D12FA14991D2D9 2
E0F9E1C71AF27644 1
E131BFC55AF4CDCE 2
E1B23A1B0EAE7DCF 2
E29559653E1E5D44 2
E2BE501BA64CD453 4
E2BE900C633FC8BB 2
E2CC72180CD1173F 8
E2E1A6C2BC5E324C 1
E3987AED25D1C7CA 3
E3E5D96565D98DA9 2
E3E8E56E44175FD9 1
E4ACB00AD7316719 5
E55487B7296ED015 7
E559AEBED8E9E078 3
E685D01156BD1FA6 1
E7426E62B87C050F 1
E760CDDAD774D717 2
E7B845B836EB153E 5
E7BF2A1987308ECA 5
E7E4CAE0EEA18A00 8
E81CC79FF1064DBD 3
E84833C4A26D6818 4
E84EC370A6154A16 1
E8AE49E596BE8075 4
E8BD3F9DE94CF252 2
E8D74D7394CDB87C 3
E8ED6BB6158694B5 5
E9913C2EF0736101 2
E9CE7EE37511E710 11
EA0C5440778B6D73 1
EA117DA516DCCE9A 10
EAE86EA0EE9F3F2E 2
EAED31B6A8CCC1D2 1
EB7125303EA0A6F9 5
EB73C03E6F4F2602 1
EBE0D9D904DF6E52 2
EC6D96F35B4B6EDC 1
EC6E91864359DD8D 47
ECD32A3785A6338C 1
ED3EA19F0B5A556B 1
ED405D0C0341A807 2
ED46FBB036F53C65 1
EE772E45E3DC084E 1
EEF64006C7D47AC1 4
EF5896A4EBC0CA3C 1
EF8A0725112B7813 4
EFC23FE521A780BB 11
EFD8A3AF3D55DDA8 1
F006E556E2A09E96 1
F0D2ACCD226C9EB8 4
F0FD2AA13A263844 4
F11C6441E99CF50A 5
F19ED8F44663520A 4
F22F55E90508FC0F 18
F268B329129FEA09 2
F2C185AC2A3FFE4B 1
F30E97C85680593D 8
F31767E967324A34 1
F37A1475EADCEC5E 4
F3FEA5332560D893 3
F44CC3ECE5C1C448 1
F5052DF171744331 2
F5192686FA9BA516 2
F559561E697722BB 1
F567E121D669BA67 1
F58053809A3FD38F 4
F584862B9B7346EB 2
F5C0159294563B38 2
F5F3D76DC932FA2C 2
F5FAFB447A057019 1
F61A119640D7C0EB 3
F623B8196D573996 13
F63897E2C5E3ABA0 1
F63CC494C71DF1E8 1
F6D9A01E32E0BE2F 6
F83A4A675B2D7087 3
F83D9A82EA70E97C 1
F8548204B42ABEED 9
F85AF1304F7D7D56 1
F91FE5DF055F8E7B 1
F95E943D66FEEE8F 8
F9D5FD25E1671290 8
F9F8675E8F3925BC 7
FA0ECA96038AD21E 6
FA27C381A64FFDA5 4
FA75BB73B37F9E91 4
FB02D1A76ED1E308 3
FB3EA7AB8B51C95D 2
FB91EB2A6E481F1A 2
FBC6BF4991AE18A7 7
FBD3AC3CACEBE693 2
FCBB8401805D783F 2
FCE735441720FBE8 4
FD2253483C3B15DC 2
FD2A6A330C3F58DB 1
FD3373744827EFA7 4
FD4BB9A09080B726 2
FD83D5C547D3EA2E 3
FDCC0A3F96D1C47A 10
FE106E193F938B17 3
FE33FDC5FAE7EB96 4
FE785BA19AAA3CBB 10
FEA681A240A74D76 4
FF5C9156B2D27FBD 1
FFA4F354D3948CFB 6
FFCA848089F3BA8C 1

Apache Hive Working Example

# Opening Hive Terminal

Open terminal and simply type "hive"

# Listing all the databases

hive> show databases;
OK
default
Time taken: 0.022 seconds, Fetched: 1 row(s)

# Creating a New Databases in Hive

hive> CREATE DATABASE employee;
OK
Time taken: 0.069 seconds

# Listing the created databases

hive> show databases;    
OK
default
employee
Time taken: 0.012 seconds, Fetched: 2 row(s)

# Choosing/Selecting the Database

hive> use employee;
OK
Time taken: 0.018 seconds

# Creating a new Table in Database

hive> CREATE TABLE country_list (name STRING);
OK
Time taken: 0.083 seconds

# Listing the tables present in Databases

hive> show tables;
OK
country_list
Time taken: 0.03 seconds, Fetched: 1 row(s)

# Loading a dataset into Hive from HDFS

hive> LOAD DATA INPATH '/user/bigdata/hive/country_example.tsv' OVERWRITE INTO TABLE country_list;
Loading data to table employee.country_list
Deleted hdfs://bigdata-karthik9000/user/hive/warehouse/employee.db/country_list
Table employee.country_list stats: [numFiles1, numRows0, totalSize38, rawDataSize0]
OK
Time taken: 0.179 seconds

# Selecting all the values from Tables

hive> select * from country_list;
OK
Atlantis
Albania
China
France
Russia

Time taken: 0.056 seconds, Fetched: 6 row(s)
hive>

# Besides i have loaded the TSV file into the Hadoop HDFS

bigdata@bigdata-karthik~$ hadoop fs -ls /user/bigdata/hive
bigdata@bigdata-karthik~$ hadoop fs -put /home/bigdata/Downloads/hive/country_example.tsv /user/bigdata/hive
bigdata@bigdata-karthik~$ hadoop fs -ls /user/bigdata/hive
Found 1 items
-rw-r--r--   1 bigdata supergroup         38 2014-07-14 18:14 /user/bigdata/hive/country_example.tsv
bigdata@bigdata-karthik~$ 

HBase : Unix/Shell Script File for Creating,Putting/Disabling/Droping tables and Inserting bulk load of datasets from Hadoop HDFS

#!/bin/bash

# Declaring Variables
table0="employee"

path=$(pwd);

# Creating a Employee table in HBase

echo "exists '$table0'" | hbase shell > log
cat log | grep "Table employee does exist"
if [ $? = 0 ];then
    echo "************  table is already exists **********"

# Either you can use truncate or disable & drop options

    echo "disable '$table0'" | hbase shell
    echo "drop '$table0'" | hbase shell
#  echo "truncate '$table0'" | hbase shell
    echo "create '$table0','count'" | hbase shell

# Either you can use shell commands here or call another .sh file.. here i have used another file option

cd $path/depends
chmod +x hbase-script.sh
  ./hbase-script.sh | hbase shell
else
    echo "***********  need to create a table  **********"
    echo "create '$table0','count'" | hbase shell
cd $path/depends
chmod +x hbase-script.sh
./hbase-script.sh | hbase shell
fi

# Creating, copying and populating the table0 table

echo ${HADOOP_HOME}
username="$USER"

echo $(hadoop fs -copyFromLocal $path/depends/table0 /user/$username/hbasetable/table0)
echo $(hbase org.apache.hadoop.hbase.mapreduce.Import table0 /user/$username/hbasetable/table0)
echo $(hadoop fs -rmr /user/$username/hbasetable)
exit