Use correlated subquery over multiple columns

Michi :

DB-Fiddle

CREATE TABLE logistics (
    id int primary key,
    campaign VARCHAR(255),
    event_type VARCHAR (255),
    date_offered VARCHAR (255),
    date_ordered VARCHAR (255), 
    date_delivered VARCHAR (255),
    date_recorded VARCHAR (255),  
    date_completed VARCHAR (255),
    quantity_offered VARCHAR(255),
    quantity_ordered VARCHAR(255), 
    quantity_delivered VARCHAR(255),
    quantity_recorded VARCHAR(255), 
    quantity_completed VARCHAR(255) 
);

INSERT INTO logistics
(id, campaign, event_type, 
date_offered, date_ordered,
date_delivered, date_recorded, date_completed,
quantity_offered, quantity_ordered,
quantity_delivered, quantity_recorded, quantity_completed
)
VALUES 
("1", "C001", "offered", "2019-04-10", NULL, NULL, NULL, NULL, "500", NULL, NULL, NULL, NULL),
("2", "C001", "ordered", NULL, "2019-04-16", NULL, NULL, NULL, NULL, "450", NULL, NULL, NULL),
("3", "C001", "stored", NULL, NULL, "2019-04-18", NULL, NULL, NULL, NULL, "465", NULL, NULL),
("4", "C001", "stored", NULL, NULL, NULL, "2019-04-20", NULL, NULL, NULL, NULL, "440", NULL),
("5", "C001", "stored", NULL, NULL, NULL, NULL, "2019-04-22", NULL, NULL, NULL, NULL, "445"),

("6", "C002", "offered", "2019-08-15", NULL, NULL, NULL, NULL, "600", NULL, NULL, NULL, NULL),
("7", "C002", "ordered", NULL, "2019-09-03", NULL, NULL, NULL, NULL, "700", NULL, NULL, NULL),
("8", "C002", "stored", NULL, NULL, "2019-09-05", NULL, NULL, NULL, NULL,  "690", NULL, NULL),
("9", "C002", "stored", NULL, NULL, NULL, "2019-09-08", NULL, NULL, NULL, NULL, "692", NULL),

("10", "C003", "offered", "2019-10-24", NULL, NULL, NULL, NULL, "300", NULL, NULL, NULL, NULL),
("11", "C003", "ordered", NULL, "2019-10-28", NULL, NULL, NULL, NULL, "250", NULL, NULL, NULL),
("12", "C003", "stored", NULL, NULL, "2019-10-31", NULL, NULL, NULL, NULL, "320", NULL, NULL),

("13", "C004", "offered", "2019-11-05", NULL, NULL, NULL, NULL, "800", NULL, NULL, NULL, NULL),
("14", "C004", "ordered", NULL, "2019-11-14", NULL, NULL, NULL, NULL, "870", NULL, NULL, NULL),
("15", "C004", "stored", NULL, NULL, "2019-11-16", NULL, NULL, NULL, NULL, "740", NULL, NULL),

("16", "C005", "offered", "2019-12-17", NULL, NULL, NULL, NULL, "240", NULL, NULL, NULL, NULL),
("17", "C005", "ordered", NULL, "2020-01-12", NULL, NULL, NULL, NULL, "250", NULL, NULL, NULL),
("18", "C005", "stored", NULL, NULL, "2020-01-16", NULL, NULL, NULL, NULL, "226", NULL, NULL),

("19", "C006", "offered", "2020-01-09", NULL, NULL, NULL, NULL, "100", NULL, NULL, NULL, NULL),
("20", "C006", "ordered", NULL, "2020-01-23", NULL, NULL, NULL, NULL, "105", NULL, NULL, NULL),

("21", "C007", "offered", "2020-02-17", NULL, NULL, NULL, NULL, "900", NULL, NULL, NULL, NULL),
("22", "C008", "offered", "2020-02-25", NULL, NULL, NULL, NULL, "400", NULL, NULL, NULL, NULL);

The table above displays the purchasing process of different campaigns.
The first three process steps are defined by the event_types offered, ordered and stored.
Once a campaign has reached the process step stored the process continues with different dates called date_completed, date_recorded and date_delivered. Basically, those dates are sub_events from the event_type stored.

The hierarchy of those events is like this:

event_type = stored > ordered > offered
date = date_completed > date_recorded > date_delivered

Now, I want to extract the campaigns based on their highest event_type or - in case they already have reached the event_type stored - based on their highest date according to the defined hierarchy. The result should look like this:

campaign    event_type        date                quantity          
C001         stored         2019-04-22              445
C002         stored         2019-09-08              692
C003         stored         2019-10-31              320
C004         stored         2019-11-16              740
C005         stored         2020-01-16              226
C006         ordered        2020-01-23              105     
C007         offered        2020-02-17              900     
C008         offered        2020-02-25              400     

With reference to this question I tried to modify the query like this:

SELECT 
campaign,
event_type,
coalesce(date_offered, date_ordered) as main_event_date,
coalesce(date_delivered, date_recorded, date_completed) as sub_event_date,
coalesce(quantity_offered, quantity_ordered) as main_event_quantity,
coalesce(quantity_delivered, quantity_recorded, quantity_completed) as sub_event_quantity
FROM logistics lg
WHERE lg.id = (SELECT lg2.id
                        FROM logistics lg2
                        WHERE lg2.campaign = lg.campaign
                        ORDER BY field(lg2.event_type, 'stored', 'ordered', 'offered')
                                 field(lg2.sub_event_date, 'date_completed', 'date_recorded', 'date_delivered')
                         LIMIT 1
                        );

However, I don't know how I should you use the field function once the campaign is in the event_type stored because there is no sub_event column for the dates?

Michi :

DB-Fiddle

The solution is as the following:

Step 1: Create an aggregated_sub_table called AGR_logistics which adds an additional column called sub_event_type to the original table:

CREATE TABLE AGR_logistics    

SELECT
id,
campaign,
event_type,
 (CASE
  WHEN event_type = "stored" AND quantity_delivered IS NOT NULL THEN "delivered"
  WHEN event_type = "stored" AND quantity_recorded IS NOT NULL THEN "recorded"
  WHEN event_type = "stored" AND quantity_completed IS NOT NULL THEN "completed"
  ELSE NULL END) AS sub_event_type,
date_offered,
date_ordered,
date_delivered, 
date_recorded,
date_completed,
quantity_offered,
quantity_ordered,
quantity_delivered,
quantity_recorded,
quantity_completed
FROM logistics;

Step 2: Run a correleated subquery as described here over AGR_logistics:

SELECT 
id,
campaign,
event_type,
sub_event_type,
coalesce(date_offered, date_ordered, date_delivered, date_recorded, date_completed) as event_date,
coalesce(quantity_offered, quantity_ordered, quantity_delivered, quantity_recorded, quantity_completed) as event_quantity
FROM AGR_logistics AGR_01
WHERE AGR_01.id = (SELECT AGR_02.id
                   FROM AGR_logistics AGR_02
                   WHERE AGR_02.campaign = AGR_01.campaign
                   ORDER BY 
                     field(AGR_02.event_type, 'stored', 'ordered', 'offered'),
                     field(AGR_02.sub_event_type, 'completed', 'recorded', 'delivered')
                   LIMIT 1
                   );

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=25603&siteId=1