relational databases

 tuples... things like: (a,b,c,d,e,d,e,g)

 relations are sets of tuples


  select : filter tuples based on some condition

  project: (a,b,c,d) => (a,c)

  join: (userid,name), (userid,email) => 
       (userid,name,email)

SQL : structured query language

DDL: define structure: data definition language


drop table blah;
create table blah(blahid bigint, name varchar(50))


DML: data manipulation language

CRUD: create retrieve update delete

insert blah(blahid,name) values (1,'blah');
update blah set name='glah' where blahid=1;
select blahid,name from blah where blahid=1;
delete blah where blahid=1;



drop table customer;
create table customer (
  custid bigint,
  fname varchar(20),
  lname varchar(20),
  email varchar(60),
  dob date,
  street1 varchar(50),
  street2 varchar(50),
  city varchar(50),
  state varchar(2),
  zip varchar(5)
);

-- get emails of customers born after 2000.


select distinct email 
from customer
where dob>=cast('2000-01-01' as date)

select distinct email 
from customer
where dob>=to_date('2000-01-01','YYYY-MM-DD')



--first, lets get domain name
-- get counts by domain name

select a.*,
 substring(email,position('@' in email),1000) dm
from customer


--then there's another set of functions:
--aggregates


-- count customers in NY
select count(*)
from customer
where state='NY'


-- count customers in NY who were born after 2000.
select count(*)
from customer
where state='NY' and 
  dob>=cast('2000-01-01' as date)
  
-- count emails of customers
select count(email) emailcnt, count(*) cnt
from customer

--get count of customers by state
select state,count(*)
from customer
group by state


---if want to order by count, descending
select state,count(*)
from customer
group by state
order by 2 desc;



-- get counts by domain name


select 
 substring(email,position('@' in email),1000) dn,
 count(*)
from customer
group by 
 substring(email,position('@' in email),1000)

--- cleaner way for this:
with custwithdomain as (
  select a.*,
   substring(email,position('@' in email),1000) dn
  from customer a
)
select dn,count(*)
from custwithdomain
group by dn
order by 2 desc;

--find the minimum and maximum dob
select min(dob) mindob, max(dob) maxdob
from customer

---find average age by state

select a.*, 
  DATEDIFF(year, dob, cast(now() as date)) age
from customer a

-- 
select state,
 avg(DATEDIFF(year, dob, cast(now() as date))) avgage
from customer a
group by state
order by 2 desc

---what's the age distribution of customers
with custage as (
 select a.*, 
  DATEDIFF(year, dob, cast(now() as date)) age
 from customer a
),
dcust as (
 select a.*,
   cast(age/10 as int) grp1,
   case when age < 32 then 'YNG'
        when age < 55 then 'MAG'
        when age < 70 then 'ELD'
        else 'VELD' 
   end lbl
 from custage
)
select grp1,count(*)
from dcust 
group by grp1
order by 2 desc


---age group distribution by state.
with custage as (
 select a.*, 
  DATEDIFF(year, dob, cast(now() as date)) age
 from customer a
),
dcust as (
 select a.*,
   cast(age/10 as int) grp1,
   case when age < 32 then 'YNG'
        when age < 55 then 'MAG'
        when age < 70 then 'ELD'
        else 'VELD' 
   end lbl
 from custage
)
select state,grp1,count(*)
from dcust 
group by state,grp1
order by 3 desc

---
count by state, how many in each age group category.

--- state, YNGcnt, MAGcnt, ELDcnt, VELDcnt

with custage as (
 select a.*, 
  DATEDIFF(year, dob, cast(now() as date)) age
 from customer a
),
dcust as (
 select a.*,
   cast(age/10 as int) grp1,
   case when age < 32 then 'YNG'
        when age < 55 then 'MAG'
        when age < 70 then 'ELD'
        else 'VELD' 
   end lbl
 from custage
)
select state,
 sum(case when lbl='YNG' then 1 else 0 end) YNGcnt,
 sum(case when lbl='MAG' then 1 else 0 end) MAGcnt,
 sum(case when lbl='ELD' then 1 else 0 end) ELDcnt,
 sum(case when lbl='VELD' then 1 else 0 end) VELDcnt
from dcust
group by state


---library

create table liblog(
  liblogid bigint,
  tim timestamp,
  bookid bigint,
  custid bigint,
  event_type varchar(1)
)

create table book (
  bookid bigint,
  title varchar(100),
  isbn varchar(30),
)


--event_type: N is new book, D is destroyed book,
-- B is borrowed, R is returned.

---how many times has bookid=124 been borrowed?

select count(*)
from liblog
where bookid=124 and event_type='B'

--- how many books are in the library?
--- (count multiple copies as separate books)

select count(*)
from liblog
where event_type='N'

--- how many unique books are in the library?
select count(distinct bookid)
from liblog
where event_type='N'

--- how many books are currently borrowed?

select sum(case when event_type='B' then 1
    when event_type='R' then -1 else 0 end) cnt
from liblog
where event_type in ('B','R')

--- is there a copy of book 123 available
--- to borrow right now?

select 
  sum(case when event_type in ('N','R') then 1
          when event_type in ('B','D') then -1) cnt
from liblog
where bookid=123 

---was there a copy of book 123 available 
---to borrow last monday?

select 
  sum(case when event_type in ('N','R') then 1
          when event_type in ('B','D') then -1) cnt
from liblog
where bookid=123 and 
  tim<=cast('2018-09-10' as date)

---order days by borrowing volume (biggest first).

select cast(tim as date) dt,count(*)
from liblog
where event_type='B'
group by cast(tim as date)
order by 2 desc;

--find all customers who need to return a book
select custid
from liblog
where event_type in ('B','R')
group by custid
having 
  sum(case when event_type in ('R') then -1
       when event_type in ('B') then 1) > 0

---borrowing window is 2 weeks. 
---find all customers who are overdue to return
---the book. (simplify a bit; for now).

with bevens as (
 select custid,bookid,
  max(case when event_type='B' then tim 
         else null end) btim
  max(case when event_type='R' then tim 
         else null end) rtim
 from liblog
 group by custid,bookid
)
select custid,bookid,btim
from bevents
where 
  (rtim is null or btim > rtim)
  and
  DATEDIFF(day,cast(btim as date),
     cast(now() as date)) > 14




with bevens as (
 select custid,bookid,
  max(case when event_type='B' then tim 
         else null end) btim
  max(case when event_type='R' then tim 
         else null end) rtim
 from liblog
 group by custid,bookid
),
overdueinfo as (
  select custid,bookid,btim 
  from bevents  
 where 
  (rtim is null or btim > rtim)
  and
  DATEDIFF(day,cast(btim as date),
     cast(now() as date)) > 14
)
select b.fname,b.lname,b.email,c.title,a.btim
from overdueinfo a 
  inner join customer b
  on a.custid=b.custid
  inner join book c
  on a.bookid =c.bookid

Comments

Popular posts from this blog