AWS EMR Hive Dynamic partitioning transformation job example in SQL

This is an example to understand the power of Dynamic partitioning in Hive

 

set hive.exec.dynamic.partition.mode=nonstrict;

CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable(

  GAID string,

  leave_timestamp string,

  latitude  string,

  longitude string,

  stay_time string,

  country string,

  city string,

  Street string,

  house string,

Home_Country string,

Home_City string,

Home_Neighborhood string,

Home_Zip_Code string,

Office_Country string,

Office_City string,

Office_Neighborhood string,

Office_Zip_Code string,

Zip_of_the_location string,

POI_Name string,

POI_Type string,

POI_Tag_Value string,

Altitude string,

Server_Version string,

Ver_No string)

PARTITIONED BY (dt string)

ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ LINES TERMINATED BY ‘\n’

LOCATION ‘s3://myBucket/production/rawdata/online/‘ ;

MSCK REPAIR TABLE sampledb.myTable;

    CREATE EXTERNAL TABLE IF NOT EXISTS sampledb.myTable_parquet(

    GAID string,

    leave_timestamp string,

    latitude  string,

    longitude string,

    stay_time string,

    country string,

    city string,

    Street string,

    house string,

    Home_Country string,

    Home_City string,

    Home_Neighborhood string,

    Home_Zip_Code string,

    Office_Country string,

    Office_City string,

    Office_Neighborhood string,

    Office_Zip_Code string,

    Zip_of_the_location string,

    POI_Name string,

    POI_Type string,

    POI_Tag_Value string,

    Altitude string,

    Server_Version string,

    Ver_No string

    )

    PARTITIONED BY (dt string)

    STORED AS PARQUET

    LOCATION ‘s3://myBucket/production/parquetdata/online_new/‘ ;

MSCK REPAIR TABLE sampledb.myTable_parquet;

INSERT OVERWRITE TABLE sampledb.myTable_parquet  partition (dt)

SELECT

regexp_replace(gaid, ‘”‘,”),

regexp_replace(leave_timestamp, ‘”‘,”) ,

regexp_replace(latitude, ‘”‘,”),

regexp_replace(longitude, ‘”‘,”) ,

regexp_replace(stay_time, ‘”‘,”) ,

regexp_replace(country, ‘”‘,”),

regexp_replace(regexp_replace(city, ‘”‘,”), “‘”,””),

regexp_replace(street, ‘”‘,”),

regexp_replace(house, ‘”‘,”),

regexp_replace(Home_Country, ‘”‘,”),

regexp_replace(regexp_replace(home_city, ‘”‘,”), “‘”,””),

regexp_replace(Home_Neighborhood, ‘”‘,”),

regexp_replace(Home_Zip_Code, ‘”‘,”),

regexp_replace(Office_Country, ‘”‘,”),

regexp_replace(regexp_replace(office_city, ‘”‘,”), “‘”,””),

regexp_replace(Office_Neighborhood, ‘”‘,”),

regexp_replace(Office_Zip_Code, ‘”‘,”),

regexp_replace(Zip_of_the_location, ‘”‘,”),

regexp_replace(POI_Name, ‘”‘,”),

regexp_replace(POI_Type, ‘”‘,”),

regexp_replace(POI_Tag_Value, ‘”‘,”),

regexp_replace(Altitude, ‘”‘,”),

regexp_replace(server_version, ‘”‘,”),

regexp_replace(ver_no, ‘”‘,”),

  dt

FROM sampledb.myTable WHERE dt>=current_date() – interval ‘3’ day;

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s