-- Queries related to lectures 4 and 5, September 28 and October 5, 2012 -- Rasmus Pagh -- Schema (IMDB data): -- Movie (id, title, year, color, country, language, distrVotes,imdbVotes, imdbRank, releaseDate) -- Person(id,name,gender,birthdate,deathdate,height) -- Involved(personId,movieId,role) -- movieRef(fromId,toId) -- danishMovies (title, year, director, imdbVotes, imdbRank) create view mov as (select id,title,year,imdbVotes,imdbRank from movie); use IMDB show tables desc mov -- Selection, projection: ------------------------- select * from person where height > 210; select * from person where height > 210 and deathdate is NULL; select name as tallperson from person where height > 210; select distinct year from movie; select distinct year(birthdate) as year from person; select distinct 10*floor(year/10) as decade from danishMovies; -- Cartesian product: --------------------- select * from person where name LIKE "Tom Cr%"; --- Contrary to popular belief, not born on the 4th of July (but close) select * from mov where title LIKE "Rain %"; select title, name from mov, person where title LIKE "Rain %" and name LIKE "Tom Cr%"; -- Join: -------- select title, name from mov, involved, person where title LIKE "Rain %" and name LIKE "Tom Cr%" and mov.id=movieId and personId=person.id; select title, name from mov, involved, person where name LIKE "Tom Cr%" and mov.id=movieId and personId=person.id; select year, title, name from mov, involved, person where not title LIKE '"%' and name LIKE "Tom Cr%" and mov.id=movieId and personId=person.id order by year; -- condition on title filters away appearances in TV shows select * from Movie, movieRef where title="Pulp Fiction" and id=fromId; select m1.title,m1.year,m2.year from mov m1, mov m2 where m1.title=m2.title and m1.id>m2.id; select M2.title, M2.year from Movie M1, Movie M2, movieRef where M1.title="Pulp Fiction" and M1.id=fromId and M2.id=toId ORDER BY year; -- String operations: --------------------- select * from mov where title = "pULP fIcTiOn"; select title from mov where title LIKE "Pu__ Fiction"; select title from mov where title LIKE "Pu__ %"; -- Set operations ----------------- (select distinct year(birthdate) as year from person) union (select distinct year(deathdate) as year from person); (select distinct year(birthdate) as year from person) union all (select distinct year(deathdate) as year from person); -- INTERSECT is not a keyword in MySQL, but can be implemented as a join: SELECT r1.year FROM (select distinct year(birthdate) as year from person) r1, (select distinct year(deathdate) as year from person) r2 WHERE r1.year=r2.year; -- Aggregation: --------------- SELECT SUM(height) FROM person; SELECT AVG(height) FROM person; SELECT COUNT(height) FROM person; SELECT COUNT(*) FROM person; SELECT AVG(DATEDIFF(sysdate(),birthdate)/365.25) from person where deathdate is null; -- Grouping: ------------ select year,count(*) from movie group by year; select year(birthdate),count(*) from person group by year(birthdate); select country,avg(imdbRank) from movie group by country; select country,avg(imdbRank) r from movie group by country order by r; select director,count(*) from danishMovies group by director having count(*)>5; select country,count(*),avg(imdbRank) r from movie group by country having count(*)>100 and avg(imdbRank)>6.5 order by r; select country,avg(imdbRank) r from movie group by country having country LIKE "N%" order by r select country, language, count(*) from movie group by country,language; -- Subqueries in FROM: ---------------------- select * from (select * from movie where year=1994) A where country="Denmark"; select country, max(c) from (select country, language, count(*) c from movie group by country,language) cl group by country; select movie.country, language from (select country, max(c) as ma from (select country, language, count(*) c from movie group by country,language) cl group by country) cm, movie where cm.country = movie.country group by movie.country, movie.language, cm.ma having count(*)=cm.ma; -- Subqueries in WHERE: ----------------------- SELECT director,title,year FROM danishMovies d1 WHERE year = (SELECT MAX(year) FROM danishMovies d2 WHERE d1.director=d2.director); SELECT director,title,year FROM danishMovies d1 WHERE year NOT IN (SELECT MAX(year) FROM danishMovies d2 WHERE d1.director=d2.director); SELECT director,title,year FROM danishMovies d1 WHERE d1.title = (SELECT title FROM danishMovies d2 WHERE d1.director=d2.director and d1.year=d2.year); -- Gives runtime error since there are directors with two movies in same year. -- Views, outer joins: ---------------------- CREATE VIEW imdb AS (SELECT title,year,name,birthdate,deathdate,height FROM mov, involved, person WHERE mov.id=movieId and personId=person.id); select title,year,avg(height) from imdb group by title,year order by avg(height) desc limit 10; CREATE VIEW BestMovies AS (SELECT id,title,year,country,imdbRank FROM Movie WHERE imdbRank>9.3); CREATE VIEW USAmovies AS (SELECT id,title,year,country,imdbRank FROM Movie WHERE country="USA"); SELECT BestMovies.title,BestMovies.country FROM (BestMovies LEFT JOIN USAMovies ON BestMovies.id=USAmovies.id) WHERE USAmovies.id IS NULL; -- Actors having at most 5 movies? select name, count(*) from person,involved where personId=person.id group by name having count(*)<=5; -- Problem: Does not list actors with no movie! Solution: select name, count(*) from (person left join involved on personId=person.id) group by name having count(*)<=5; -- NULLs: --------- SELECT * FROM BestMovies WHERE ((country="Canada") or (country!="Canada" and imdbRank>9.5)); -- Works correctly if no country is the empty string, but incorrectly if it is null. UPDATE Movie SET country=null WHERE id=643; SELECT * FROM BestMovies WHERE ((country="Canada") or (country!="Canada" and imdbRank>9.5)); -- "Band of brothers" is gone! select count(*) from Person; select count(height) from Person; select count(*) from Person where height IN (null); select count(*) from Person where height IS NULL; select count(*) from Person where height=NULL; select count(*) from Person where (height,gender) IN ((null,'m')); select count(*) from Person where (height,gender) NOT IN ((null,'m')); select count(*) from Person where gender!='m'; -- Updating views: ------------------ create view movietitles as (select title from movie); insert into movietitles values ('Superrrrrman'); select * from movie where title like '%rrrr%'; +-------+--------------+------+-------+---------+----------+------------+-----------+----------+-------------+ | id | title | year | color | country | language | distrVotes | imdbVotes | imdbRank | releaseDate | +-------+--------------+------+-------+---------+----------+------------+-----------+----------+-------------+ | 0 | Superrrrrman | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | | 41135 | RRRrrrr!!! | 2004 | Color | France | French | 0000111001 | 5672 | 5.4 | NULL | +-------+--------------+------+-------+---------+----------+------------+-----------+----------+-------------+ 2 rows in set (0.07 sec) create view movietitles2 as (select distinct title from movie); insert into movietitles2 values ('Superrrrrman 2'); ERROR 1471 (HY000): The target table movietitles2 of the INSERT is not insertable-into