SQL... relational databases


2 parts... DDL, DML...

DDL: create table... drop table...

DML: CRUD operations, insert, rerieve, update, delete...

insert/select/update/delete

-- CTAs "create table as"
create table blah as 
  select a,b,c,d
  from soemthing;

-- CTEs: common table expressions
with blah as (
 select a,b,c from somewhere
),
glah as (
  select b,c,d from somewhereelse
)
select *
from blah a
  inner join glah b
  using(b,c)

--functions
select now();

select to_char(now(),'YYYY-MM-DD') curryear;

select count(*)
from glah;

select min(tim),max(tim) from somerecords;

count, min,max,sum,stddev,avg,


create table daily_return (
  trade_dt date,
  ticker varchar(32),
  interest numeric(18,8)  --percent return
);

--- if you have daily_return, and you invested
--- $10000 on 2018-01-01 in IBM, sold on 
--- 2018-10-01, how much profit would you have?

select  10000*exp(sum(log(1+interest/100))) 
from daily_return 
where ticker='IBM' and 
  trade_dt between '2018-01-01' and '2018-10-01'


---joins...

---BANK schema
customer(custid,fname,lname,ssn)
address(addid,custid,street,city,state,zip)
phone(phoneid,custid,phone,type)
account(accntid,custid,type)
journal(tid,accntid,amnt,tim)

--double entry accounting
--create special 'CASH' account, accntid=1

---customer 3 deposits $100 into account 122123

insert into journal(tid,accntid,amnt,tim) 
 values(1,122123,100,now())
insert into journal(tid,accntid,amnt,tim) 
 values(1,1,-100,now())

--- how much money is there in account 3?
select sum(amnt)
from journal
where accntid=3

---what's the balance on 2017-12-31?

select sum(amnt)
from journal
where accntid=3 and tim<'2018-01-01'


---how much money does the bank hold?
select -sum(amnt)
from journal 
where accntid=1

select sum(amnt)
from journal 
where accntid!=1

--how much money do john doe (s) have in checking?

select sum(amnt)
from customer a
  inner join account b
  on a.custid=b.custid
  inner join journal c
  on b.accntid=c.accntid
where lname='Doe' and fname='John'

--in which zip code does the person with the 
-- most money live?

with indtot as (
  select a.custid,sum(amnt) tot
  from customer a
    inner join account b
    on a.custid=b.custid
    inner join journal c
    on b.accntid=c.accntid
  group by a.custid
),
maxtot as (
  select max(tot) mtot from indtot
)
select a.custid,c.zip,a.tot
from indtot a
  inner join maxtot b
  on a.tot=b.mtot
  inner join address c
  on a.custid=c.custid
group by a.custid,c.zip,a.tot
order by 1,2;


----------------------------------

explain select sum(amnt)
from customer a
  inner join account b
  on a.custid=b.custid
  inner join journal c
  on b.accntid=c.accntid
where lname='Doe' and fname='John'

------------------------------------



inner loop query

table1, table2,  both key "key1,key2"
--what we're trying to do is:

select *
from table1 a
  inner join table2 b
  on a.key1=b.key1 and a.key2=b.key2

----------------------------------
for(all records in table1){
  for(all records in table2){
    if(table1.key1==table2.key1 &&
        table1.key2==table2.key2){
       output table1,table2 record.
    }
  }
}

---------------------------------
-- sort/merge (or merge/sort) join 
sort table1, on key1,key2 
sort table2, on key1,key2

while(table1 has records && table2 has records){
  if(table1.{key1,key2} < table2.{key1,key2}){
    toss away table1 record.
  }else if(table1.{key1,key2} > table2.{key1,key2}){
    toss away table2 record.
  }else{
    for(same key table1 records){
      for(same key table2 records){
        output table1,table2 records.
      }
    }
  }
}
------------------------------------------
--hash join
stbl is smaller(table1,table2)
btbl is biggger(table1,table2)

for(all records in stbl){
   key = stbl.{key1,key2}
   hkey = hash(key)
   if(htable[hkey] is empty)
     htable[hkey] = new list
   htable[hkey].add(record)
}

for(all records in btbl){
   key = btbl.{key1,key2}
   hkey = hash(key)
   for(entries v in htable[hkey]){
      if(v.{key1,key2} == record.{key1,key2}
         output btbl,v records.
   }
}

// # of loops is linear.
// but most hash implementations are log(n)

////////////////////////////////////

advertisement(advertisementid,itemid,starttime,
   endtime,channel)
custevent(custid,eventinfo,tim,itemid)

// find all customers who purchased an itemid
// within 5 minutes of end of advertisement
// or end of advetisement

select a.*,b.*
from custevent a 
  inner join advertisement b
  on a.itemid=b.itemid and 
     a.tim >= b.start and 
     a.tim <=(b.end+5 minutes)
where eventinfo='purchase'
-------------------------------------------------

sort customer events by tim
for(advertisement events){
  find index of startime in customer events
  find index of endtime+5min in customer events
  output all customer events between these indexes
}

------------------------------------------------
generate a bucket for every minute.

with numbs as (
 select 0 n union all .....
)
--- remember this from last class?

with recursive numbs(n) as (
   select 0 n
   union all 
   select n+1 from numbs where n<10000
)
select * from numbs;

----generate a table called "minutes"
minutes(minuteid)
---24*60 records.

with m1 as (
  select a.*,
    to_char(tim,'HHMM') minuteid
  from customer a
),
with a1 as (
  select a.*,b.minuteid
  from advertisement a 
    inner join minutes b
    on to_char(a.start,'HHMM') >= b.minuteid and
      to_char(a.end+5min,'HHMM') <= b.minuteid
)
select *
from m1 a
    inner join a1 b     -m1a1
    on a.minuteid=b.minuteid and
     a.itemid=b.itemid and 
     a.tim >= b.start and 
     a.tim <=(b.end+5 minutes)

----------------------------------------------

Comments

Popular posts from this blog