Ads layer data is often the final result indicator data, used in large screen display or real-time stream processing, through the following two examples to practice how to write business large screen display SQL.

1. Members analyze cases

1.1 Data Preparation

The structure of the table is as follows. This table is the membership table of THE DWS layer in the dimension of days, such as the summary of daily membership information.

use dws;
drop table if exists dws.dws_member_start_day;
create table dws.dws_member_start_day(
`device_id` string, -- Device ID, to distinguish users
`uid` string, -- uid
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string
) COMMENT 'Member Day Launch Summary'
partitioned by(dt string)
stored as parquet;

Copy the code

1.2 Member index calculation

Silent members are defined as those who have only launched the App on the day of installation and installed it 7 days ago

The definition of attrition member: the member who has not logged in in the last 30 days

1.2.1 How to calculate the number of silent members

Sum = sum = sum = sum = sum = sum = sum
SELECT count(*)
FROM
  (SELECT device_id,
          sum(device_id) OVER (PARTITION BY device_id) AS sum_num,
                     dt
   FROM dws.dws_member_start_day) tmp
WHERE dt < = date_add(CURRENT_DATE.7 -)
  AND sum_num=1

Copy the code

1.2.2 How to calculate the lost members

Get the last login time of the member and filter by row_number
SELECT count(*)
FROM
  (SELECT device_id,
          dt,
          row_number(a)OVER (PARTITION BY device_id
                             ORDER BY dt DESC) ro
   FROM dws.dws_member_start_day) tmp
WHERE ro=1
  AND dt > = date_add(CURRENT_DATE.- 30) 
Copy the code

2. Core transaction cases

2.1 Data Preparation

Given a daily order dimension table, the table structure is shown below:

DROP TABLE IF EXISTS dwd.dwd_trade_orders;
create table dwd.dwd_trade_orders(
`orderId`    int,
`orderNo`   string,
`userId`    bigint,
`status`    tinyint,
`productMoney` decimal,
`totalMoney`  decimal,
`payMethod`   tinyint,
`isPay`     tinyint,
`areaId`    int,
`tradeSrc`   tinyint,
`tradeType`   int,
`isRefund`   tinyint,
`dataFlag`   tinyint,
`createTime`  string,
`payTime`   string,
`modifiedTime` string,
`start_date`  string,
`end_date`   string
) COMMENT 'Order Fact Zipper Table'
partitioned by (dt string)
STORED AS PARQUET;
Copy the code

Where, order status -3 rejected by the user -2 unpaid order -1 cancelled by the user 0 Waiting for delivery 1 In distribution 2 confirmed receipt of the goods, order validity mark -1 deleted 1 valid

Data preprocessing, it is not very convenient in the detail fact zipper table processing, can make an intermediate table, dWS_trade_orderS_day its table structure and processing are as follows:

DROP TABLE IF EXISTS dws.dws_trade_orders_day;

CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_day(day_dt string COMMENT 'Date: YYYY-MM-DD',
                                                   day_cnt decimal commnet 'Daily orders',
                                                   day_sum decimal COMMENT 'Total daily orders') COMMENT 'Daily Order statistics';

SELECT dt,
       count(*) cnt,
       sum(totalMoney) sm
FROM
  (SELECT DISTINCT orderid,
                   dt,
                   totalMoney
   FROM dwd.dwd_trade_orders
   WHERE status > = 0
     AND dataFlag = '1') tmp
GROUP BY dt;


INSERT OVERWRITE TABLE dws.dws_trade_orders_day
SELECT dt,
       count(*) cnt,
       sum(totalMoney) sm
FROM
  (SELECT DISTINCT orderid,
                   dt,
                   totalMoney
   FROM dwd.dwd_trade_orders
   WHERE status > = 0
     AND dataFlag = '1') tmp
GROUP BY dt;


SELECT *
FROM dws.dws_trade_orders_day
WHERE day_dt BETWEEN '2020-01-01' AND '2020-12-31';
Copy the code

2.2 Indicator 1: The number of sales orders and the total amount of orders in each quarter in 2020

Create the ADS indicator table: dWS_trade_ORDERS_quarter

DROP TABLE IF EXISTS dws.dws_trade_orders_quarter;


CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_quarter(YEAR string COMMENT 'year',
                                                        QUARTER string COMMENT 'quarter',
                                                        cnt decimal COMMENT 'Total orders',
                                                        SUM decimal COMMENT 'Total order amount') COMMENT 'Quarterly Order Statistics';


INSERT OVERWRITE TABLE dws.dws_trade_orders_quarter WITH tmp AS
  (SELECT substr(day_dt, 0.4) YEAR.CASE WHEN substr(dat_dt, 6.2)="01"
   OR substr(dat_dt, 6.2)="."OR substr(day_dt, 6.2)="3"THEN "1" WHEN substr(dat_dt, 6.2)="04"
   OR substr(dat_dt, 6.2)="5"OR substr(day_dt, 6.2)="6"THEN "2" WHEN substr(dat_dt, 6.2)="7"OR substr(dat_dt, 6.2)="08"
   OR substr(day_dt, 6.2)="09" THEN "3" WHEN substr(dat_dt, 6.2)="10"
   OR substr(dat_dt, 6.2)="11"
   OR substr(day_dt, 6.2)="12" THEN "4" AS QUARTER day_cnt,
                                     day_sum
   FROM dws.dws_trade_orders_day)
SELECT YEAR,
       QUARTER,
       sum(day_cnt),
       sum(day_sum)
FROM tmp
GROUP BY YEAR QUARTER;
Copy the code

2.3 Count the number and total amount of sales orders in each month in 2020

Create the ADS indicator table: dWS_trade_ORDERS_month

DROP TABLE IF EXISTS dws.dws_trade_orders_month;

CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_month(yearstring COMMENT 'year'.MONTH string COMMENT '月份',
                                                      month_cnt decimal COMMENT 'Monthly Total Orders',
                                                      month_sum decimal COMMENT 'Total Monthly Orders') COMMENT 'Monthly Order Statistics';


INSERT OVERWRITE TABLE dws.dws_trade_orders_month WITH tmp AS
  (SELECT substr(day_dt, 0.4) YEAR,
                               sunstr(day_dt, 6.2) MONTH,
                                                    day_cnt,
                                                    day_sum
   FROM dws.dws_trade_orders_day)
SELECT YEAR.MONTH.sum(day_cnt) month_cnt,
       sum(day_sum) month_sum
FROM tmp
GROUP BY YEAR.MONTH;
Copy the code

2.4 Count the number and total amount of sales orders every week (From Monday to Sunday) in 2020

Create ads level indicator table: dws_trade_orderS_week uses the date function weekofyear

DROP TABLE IF EXISTS dws.dws_trade_orders_week;
CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_week(YEAR string COMMENT 'year',
                                                     WEEK string COMMENT 'Week of the year',
                                                     week_cnt decimal COMMENT 'Total Weekly Orders',
                                                     week_sum decimal COMMENT 'Weekly Total Orders') COMMENT 'Weekly Order Statistics';


INSERT OVERWRITE TABLE dws.dws_trade_orders_week
SELECT substr(day_dt, 0.4) YEAR,
                            weekofyear(day_dt) WEEK,
                                               sum(day_cnt),
                                               sum(day_sum)
FROM dws.dws_trade_orders_day
GROUP BY substr(day_dt, 0.4) YEAR,
                              weekofyear(day_dt) WEEK;
Copy the code

2.5 Count the number of orders and total number of orders in national holidays, rest days and working days in 2020

Create date information dimension table: DIM_DAY_info and input holiday information data (the data is different every year and needs to be notified by The State Council, so regular manual maintenance is required)

drop table if exists dim.dim_day_info;
create table if not exists dim.dim_day_info(
  day_dt string comment 'date',
  is_holidays int comment 'Holiday signs: 0 not 1 yes',
  is_workday int comment 'Weekday identifier 0 not 1 yes'
) comment 'Date information Table';
Copy the code
-- Count the number of orders and the total amount of orders for the 2020 holiday

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_holiday = 1;

-- Count the number and total amount of orders on rest days in 2020

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_workday = 0;

-- Count the number of orders and total number of orders in the working days of 2020

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_workday = 1;
Copy the code

Check your profile for more.