-- Queries related to lecture on October 5, 2012 -- Rasmus Pagh -- Schema (IMDB data): -- movie (id, title, year, color, country, language, distrVotes,imdbVotes, imdbRank, releaseDate) -- genre (movieId, genre) -- movieRef (fromId,toId) use imdb -- Motivation: Join example mysql> select count(*) from person where year(birthdate)=1875; +----------+ | count(*) | +----------+ | 35 | +----------+ 1 row in set (0.07 sec) mysql> select count(*) from movie where year=1920; +----------+ | count(*) | +----------+ | 43 | +----------+ 1 row in set (0.03 sec) mysql> select count(distinct movieId) from involved,person where year(birthdate)=1875 and personId=person.id; +-------------------------+ | count(distinct movieId) | +-------------------------+ | 900 | +-------------------------+ 1 row in set (0.08 sec) mysql> select count(distinct personId) from involved,movie where year=1920 and movieId=movie.id; +--------------------------+ | count(distinct personId) | +--------------------------+ | 214 | +--------------------------+ 1 row in set (6.76 sec) -- Why this difference? The explanation lies in the index: mysql> show indexes from involved; +----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | involved | 0 | PRIMARY | 1 | personId | A | 352331 | NULL | NULL | | BTREE | | | | involved | 0 | PRIMARY | 2 | movieId | A | 1761658 | NULL | NULL | | BTREE | | | | involved | 0 | PRIMARY | 3 | role | A | 1761658 | NULL | NULL | | BTREE | | | +----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ 3 rows in set (0.01 sec) -- Point query mysql> select * from person where birthdate = '1975-02-06'; +--------+-----------------+--------+------------+-----------+--------+ | id | name | gender | birthdate | deathdate | height | +--------+-----------------+--------+------------+-----------+--------+ | 406298 | Nabil Elouahabi | m | 1975-02-06 | NULL | NULL | +--------+-----------------+--------+------------+-----------+--------+ 1 row in set (0.10 sec) mysql> create index aa on person(birthdate); mysql> select * from person where birthdate = '1975-02-06'; +--------+-----------------+--------+------------+-----------+--------+ | id | name | gender | birthdate | deathdate | height | +--------+-----------------+--------+------------+-----------+--------+ | 406298 | Nabil Elouahabi | m | 1975-02-06 | NULL | NULL | +--------+-----------------+--------+------------+-----------+--------+ 1 row in set (0.00 sec) mysql> show indexes from movie; +-------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +-------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ | movie | 0 | PRIMARY | 1 | id | A | 51421 | NULL | NULL | | BTREE | | | | movie | 1 | title | 1 | title | A | 189 | NULL | NULL | YES | BTREE | | | | movie | 1 | a | 1 | imdbRank | A | 189 | NULL | NULL | YES | BTREE | | | +-------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ 3 rows in set (0.14 sec) mysql> select count(*) from movie where title LIKE "Pu%" and year=1994; +----------+ | count(*) | +----------+ | 2 | +----------+ 1 row in set (0.36 sec) mysql> select count(*) from movie where year=1994 and title LIKE "Pu%"; +----------+ | count(*) | +----------+ | 2 | +----------+ 1 row in set (0.00 sec) mysql> select count(*) from movie where year=1994; +----------+ | count(*) | +----------+ | 850 | +----------+ 1 row in set (0.04 sec) -- Range query mysql> select * from person where birthdate > '2005-02-06'; +---------+------------------------+--------+------------+-----------+--------+ | id | name | gender | birthdate | deathdate | height | +---------+------------------------+--------+------------+-----------+--------+ | 1566742 | Carolyn Rose | f | 2005-02-17 | NULL | NULL | ... | 760738 | Phoenix Nicholson | m | 2010-12-23 | NULL | NULL | +---------+------------------------+--------+------------+-----------+--------+ 49 rows in set (0.00 sec) mysql> drop index aa on person; Query OK, 0 rows affected (0.08 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> select * from person where birthdate > '2005-02-06'; +---------+------------------------+--------+------------+-----------+--------+ | id | name | gender | birthdate | deathdate | height | +---------+------------------------+--------+------------+-----------+--------+ | 216793 | Aidan Barker | m | 2005-11-14 | NULL | NULL | ... | 1688530 | Ava White | f | 2005-06-24 | NULL | NULL | +---------+------------------------+--------+------------+-----------+--------+ 49 rows in set (0.12 sec) -- Join (multiple point queries) -- Number of people involved in movies of 2010: mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (8.37 sec) mysql> create index aaa on involved(movieId); Query OK, 0 rows affected (1 min 6.94 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (23.02 sec) mysql> create index year on movie(year); Query OK, 0 rows affected (0.44 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (9.14 sec) mysql> create index idx2 on involved(movieId,personId); Query OK, 0 rows affected (58.53 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (5.10 sec) mysql> explain select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +----+-------------+----------+--------+---------------+---------+---------+-----------------------+---------+-------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+----------+--------+---------------+---------+---------+-----------------------+---------+-------------+ | 1 | SIMPLE | involved | index | idx,idx2 | idx2 | 10 | NULL | 5189132 | Using index | | 1 | SIMPLE | movie | eq_ref | PRIMARY,year | PRIMARY | 4 | imdb.involved.movieId | 1 | Using where | +----+-------------+----------+--------+---------------+---------+---------+-----------------------+---------+-------------+ 2 rows in set (0.00 sec) mysql> analyze table movie; +------------+---------+----------+----------+ | Table | Op | Msg_type | Msg_text | +------------+---------+----------+----------+ | imdb.movie | analyze | status | OK | +------------+---------+----------+----------+ 1 row in set (0.55 sec) mysql> analyze table involved; +---------------+---------+----------+----------+ | Table | Op | Msg_type | Msg_text | +---------------+---------+----------+----------+ | imdb.involved | analyze | status | OK | +---------------+---------+----------+----------+ 1 row in set (0.09 sec) mysql> explain select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ | 1 | SIMPLE | movie | ref | PRIMARY,year | year | 5 | const | 1613 | Using where; Using index | | 1 | SIMPLE | involved | ref | idx,idx2 | idx2 | 5 | imdb.movie.id | 51 | Using where; Using index | +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ 2 rows in set (0.01 sec) mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (0.09 sec) mysql> explain select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ | 1 | SIMPLE | movie | ref | PRIMARY,year | year | 5 | const | 1613 | Using where; Using index | | 1 | SIMPLE | involved | ref | idx,idx2 | idx2 | 5 | imdb.movie.id | 51 | Using where; Using index | +----+-------------+----------+------+---------------+------+---------+---------------+------+--------------------------+ 2 rows in set (0.00 sec) --- Is idx2 really needed? mysql> drop index idx2 on involved; Query OK, 0 rows affected (0.64 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> select count(distinct personId) from movie,involved where year=2010 and movie.id = movieId; +--------------------------+ | count(distinct personId) | +--------------------------+ | 24610 | +--------------------------+ 1 row in set (2.06 sec) --- Yes!