SQL: structured query language

DDL: data defintion language

create table
drop table

DML: data manipulation language

insert
update
delete
select

functions:

select 
 a.*,
 concat(substr(fname,1,1),substr(lname,1,1)) as initials,
from customer

select now();


-- aggregates

select count() from customer;

select min(dob) from customer where state='NY';

-- percent of customers that are from NY
select sum(case when state='NY' then 1 else 0 end)/
  sum(1.0)*100 as prcnt
from customer;

--- CTAS

create table custemailsny as 
  select custid,fname,lname,email
  from customer
  where state='NY';

--- CTEs; common table expressions

--find the oldest customers

with oldestdob as (
  select min(dob) mindob from customer
)
select a.*
from customer a
  inner join oldestdob b
  on a.dob = b.mindob


---windowing functions

assign rank to customers based on age.

create table custranks as 
select a.*,
  dense_rank() over (order by dob) rnk
from customer;

---
custevent(eventid,custid,type,itemid)

---when a cusetomer purchases something,
---what's the probability that they were 
---viewing another item (previous event).

with custeventlag as (
 select a.*,
  lag(type) over (partition by custid 
    order by eventid) last_type,
  lag(itemid) over (partition by custid 
    order by eventid) last_itemid
 from custevent
)
select sum(case when last_type='VIEW' 
                 and last_itemid!=itemid  then 1
                else 0 end) /
       sum(1.0) as problty
from custeventlag a
where a.type='PURCHASE'

--------------------------------------------
--- ride sharing company.

car(carid,fname,lname,lic)
cust(custid,fname,lname)
request(requestid,tim,custid,addressfrom,addressto)
reqcar(requestid,tim,carid)
pickup(tim,carid,custid)
dropoff(tim,carid,custid)

---When did John Doe request a car?
select b.tim
from cust a
  inner join request b
  on a.custid=b.custid
where lname='Doe' and fname='John'
order by tim desc;

John Doe's last trip, how long did it last?

with pandd as (
 select a.*,'P' t from pickup union all
 select a.*, 'D' t from dropoff
),
johndoeevents as (
 select a.*,
   lead(tim) over (partition by custid 
      order by tim) next_tim
 from pandd a
   inner join cust b
   on a.custid=b.custid
 where b.lname='Doe' and b.fname='John'
),
tims as (
  select a.*,
    coalesce(next_tim,now()) - tim as len,
   row_number() over (order by tim desc) rn
  from johndoeevents 
  where t='P'
)
select *
from tims
where rn=1

-

car(carid,fname,lname,lic,seats)
cust(custid,fname,lname)
request(requestid,tim,custid,addressfrom,addressto)
reqcar(requestid,tim,carid)
pickup(tim,carid,custid)
dropoff(tim,carid,custid)

- Did John Doe ever share a ride with Jane Johnson?

with custids as (
 select custid,fname,lname
 from cust
 where (lname='Doe' and fname='John') or 
   (lname='Johnson' and fname='Jane') 
),
pandd as (
 select a.*,'P' t 
   from pickup natural inner join custids
  union all
 select a.*, 'D' t 
   from dropoff natural inner join custids
),
custcnt as (
 select a.*,
 sum(case 
       when t='P' and fname='John' and lname='Doe' then 1
       when t='D' and fname='John' and lname='Doe' then -1
       else 0 ) over (partition by carid order by tim) 
         johncnt,            
 sum(case 
       when t='P' and fname='Jane' and lname='Johnson' then 1
       when t='D' and fname='Jane' and lname='Johnson' then -1
       else 0 ) over (partition by carid order by tim) 
         janecnt    
 from pandd a
)
select count(*)
from custcnt
 where johncnt>0 and janecnt>0


car(carid,fname,lname,lic,seats)
cust(custid,fname,lname)
request(requestid,tim,custid,addressfrom,addressto)
reqcar(requestid,tim,carid)
pickup(tim,carid,custid)
dropoff(tim,carid,custid)

--what's the average wait time between request and pickup?

with events as (
 select a.tim,a.custid,b.carid,'R' as event_type_cd
 from request a 
  inner join reqcar b
 union all
 select tim,custid,carid, 'P' as event_type_cd
),
lagtim as (
 select a.*,
  lag(tim) over (partition by carid,custid order by tim) lag_tim
 from events
)
select avg(tim - lag_tim)
from lagtim
where event_type_cd='P'


---what's the license place of the car for request=1234?

select c.lic
from reqcar a
  inner join car c
  on a.carid=c.carid
where requestid=1234


---what's the maximum number of passengers that
---were ever picked up?

with pandd as (
 select a.*,'P' t  from pickup a
  union all
 select a.*, 'D' t from dropoff a
),
rtot as (
 select a.*,
   sum(case when t='P' then 1 
            when t='D' then -1 
            else 0 end)
     over (partition by carid order by tim) tot
 from pandd a
)
select max(tot)
from rtot


--assume we have: dist(gps1,gps2)

car(carid,fname,lname,lic,seats)
cust(custid,fname,lname)
request(requestid,tim,custid,addressfrom,addressto,custgpsx,custgpsy)
reqcar(requestid,tim,carid)
pickup(tim,carid,custid,gpsx,gpsy)
dropoff(tim,carid,custid,gpsx,gpsy)
carloc(locid,tim,carid,gpsx,gpsx)

---find cars within 5 minutes of customer request
--- (speed limit 25mph, so ~1.5miles?)

select *
from request r
  inner join carloc b
  on r.tim < b.tim and r.tim+10min >= b.tim and
    dist(custgpsx,custgpsy,b.gpsx,b.gpsy)<1.5


---
with req1 as (
 select a.*,
  cast(floor(tim / 10min) as int) as btim,
  cast(floor(custgpsx / 1mile) as int) as bx,
  cast(floor(custgpsy / 1mile) as int) as by
 from request a
),
car1 as (
 select a.*,
  cast(floor(tim / 10min) as int) as btim,
  cast(floor(gpsx / 1mile) as int) as bx,
  cast(floor(gpsy / 1mile) as int) as by
 from carloc a
),
adjbuckets as (
 select tim,custid,custgpsx,custgpsy, bx,by from req1 union all
 select tim,custid,custgpsx,custgpsy, bx,by+1 from req1 union all
 select tim,custid,custgpsx,custgpsy, bx+1,by from req1 union all
 select tim,custid,custgpsx,custgpsy, bx+1,by+1 from req1 union all
 select tim,custid,custgpsx,custgpsy, bx,by-1 from req1 union all
 select tim,custid,custgpsx,custgpsy, bx-1,by from req1 union all
 select tim,custid,custgpsx,custgpsy, bx-1,by-1 from req1 union all
 select tim,custid,custgpsx,custgpsy, bx-1,by+1 from req1 union all
 select tim,custid,custgpsx,custgpsy, bx+1,by-1 from req1 union all
)
select ...
from car1 a
  inner join adjbuckets b
where a.bx=b.bx and a.by=b.by and --bucket join
    and dist(custgpsx,custgpsy,b.gpsx,b.gpsy)<1.5

Comments

Popular posts from this blog