Over several previous blog posts, we’ve already discussed what information the
EXPLAIN FORMAT=JSON
output provides for some subqueries. You can review those discussions here, here and here. EXPLAIN FORMAT=JSON shows many details that you can’t get with other commands. Let’s now finish this topic and discuss the output for the rest of the subquery types.
First, let’s look at the subquery in the
HAVING
clause, such as in the following example:
select count(emp_no), salary
from salaries
group by salary
having salary > ALL (select avg(s)
from (select dept_no, sum(salary) as s
from salaries join dept_emp using (emp_no) group by dept_no) t
)
This example prints the number of employees and their salaries, if their salary is greater than the average salary in their department.
EXPLAIN FORMAT=JSON
provides a lot details on how this subquery is optimized:
mysql> explain format=json select count(emp_no), salary from salaries group by salary having salary > ALL (select avg(s) from (select dept_no, sum(salary) as s from salaries join dept_emp using (emp_no) group by dept_no) t)G
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "3073970.40"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": true,
"cost_info": {
"sort_cost": "2557022.00"
},
"table": {
"table_name": "salaries",
"access_type": "ALL",
"rows_examined_per_scan": 2557022,
"rows_produced_per_join": 2557022,
"filtered": "100.00",
"cost_info": {
"read_cost": "5544.00",
"eval_cost": "511404.40",
"prefix_cost": "516948.40",
"data_read_per_join": "39M"
},
"used_columns": [
"emp_no",
"salary",
"from_date"
]
},
"having_subqueries": [
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "771970.25"
},
"table": {
"table_name": "t",
"access_type": "ALL",
"rows_examined_per_scan": 3087841,
"rows_produced_per_join": 3087841,
"filtered": "100.00",
"cost_info": {
"read_cost": "154402.05",
"eval_cost": "617568.20",
"prefix_cost": "771970.25",
"data_read_per_join": "94M"
},
"used_columns": [
"dept_no",
"s"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "1019140.27"
},
"grouping_operation": {
"using_filesort": false,
"nested_loop": [
{
"table": {
"table_name": "dept_emp",
"access_type": "index",
"possible_keys": [
"PRIMARY",
"emp_no",
"dept_no"
],
"key": "dept_no",
"used_key_parts": [
"dept_no"
],
"key_length": "4",
"rows_examined_per_scan": 331570,
"rows_produced_per_join": 331570,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "737.00",
"eval_cost": "66314.00",
"prefix_cost": "67051.00",
"data_read_per_join": "5M"
},
"used_columns": [
"emp_no",
"dept_no"
]
}
},
{
"table": {
"table_name": "salaries",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"emp_no"
],
"key": "PRIMARY",
"used_key_parts": [
"emp_no"
],
"key_length": "4",
"ref": [
"employees.dept_emp.emp_no"
],
"rows_examined_per_scan": 9,
"rows_produced_per_join": 3087841,
"filtered": "100.00",
"cost_info": {
"read_cost": "334520.92",
"eval_cost": "617568.35",
"prefix_cost": "1019140.27",
"data_read_per_join": "47M"
},
"used_columns": [
"emp_no",
"salary",
"from_date"
]
}
}
]
}
}
}
}
}
}
]
}
}
}
1 row in set, 1 warning (0.00 sec)
Note (Code 1003): /* select#1 */ select count(`employees`.`salaries`.`emp_no`) AS `count(emp_no)`,`employees`.`salaries`.`salary` AS `salary` from `employees`.`salaries` group by `employees`.`salaries`.`salary` having <not>((`employees`.`salaries`.`salary` <= <max>(/* select#2 */ select avg(`t`.`s`) from (/* select#3 */ select `employees`.`dept_emp`.`dept_no` AS `dept_no`,sum(`employees`.`salaries`.`salary`) AS `s` from `employees`.`salaries` join `employees`.`dept_emp` where (`employees`.`salaries`.`emp_no` = `employees`.`dept_emp`.`emp_no`) group by `employees`.`dept_emp`.`dept_no`) `t`)))
We see that the subquery in the
HAVING
clause is not dependent, but cacheable:
"having_subqueries": [
{
"dependent": false,
"cacheable": true,
It has its own query block:
"query_block": {
"select_id": 2,
Which accesses table “t”:
"table": {
"table_name": "t",
"access_type": "ALL",
"rows_examined_per_scan": 3087841,
"rows_produced_per_join": 3087841,
"filtered": "100.00",
"cost_info": {
"read_cost": "154402.05",
"eval_cost": "617568.20",
"prefix_cost": "771970.25",
"data_read_per_join": "94M"
},
"used_columns": [
"dept_no",
"s"
],
Table “t” was also materialized from the subquery:
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
Another kind of subquery is in the
SELECT
list. If we want to compare the salary of an employee with the average salary in the company, for example, we can use the query
select emp_no, salary, (select avg(salary) from salaries) from salaries
. Lets examine the
EXPLAIN
output:
mysql> explain format=json select emp_no, salary, (select avg(salary) from salaries) from salariesG
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "516948.40"
},
"table": {
"table_name": "salaries",
"access_type": "ALL",
"rows_examined_per_scan": 2557022,
"rows_produced_per_join": 2557022,
"filtered": "100.00",
"cost_info": {
"read_cost": "5544.00",
"eval_cost": "511404.40",
"prefix_cost": "516948.40",
"data_read_per_join": "39M"
},
"used_columns": [
"emp_no",
"salary"
]
},
"select_list_subqueries": [
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "516948.40"
},
"table": {
"table_name": "salaries",
"access_type": "ALL",
"rows_examined_per_scan": 2557022,
"rows_produced_per_join": 2557022,
"filtered": "100.00",
"cost_info": {
"read_cost": "5544.00",
"eval_cost": "511404.40",
"prefix_cost": "516948.40",
"data_read_per_join": "39M"
},
"used_columns": [
"salary"
]
}
}
}
]
}
}
1 row in set, 1 warning (0.00 sec)
Note (Code 1003): /* select#1 */ select `employees`.`salaries`.`emp_no` AS `emp_no`,`employees`.`salaries`.`salary` AS `salary`,(/* select#2 */ select avg(`employees`.`salaries`.`salary`) from `employees`.`salaries`) AS `(select avg(salary) from salaries)` from `employees`.`salaries`
EXPLAIN FORMAT=JSON
in this case shows that the subquery is part of the first
query_block
, not dependent and cacheable.
The last type of subquery I want to discuss is the subquery updating values. For example, I added a new column to the
titles
table from the standard employees database:
mysql> alter table titles add column full_title varchar(100);
Query OK, 0 rows affected (24.42 sec)
Records: 0 Duplicates: 0 Warnings: 0
Now I want
full_title
to contain both the department’s name and title, separated by a space. I can use
UPDATE
with the subquery to achieve this:
update titles
set full_title=concat((select dept_name
from departments
join dept_emp using(dept_no)
where dept_emp.emp_no=titles.emp_no and dept_emp.to_date='9999-01-01')
,' ', title)
where to_date = '9999-01-01';
To find out how it is optimized, we can use
EXPLAIN FORMAT=JSON
:
mysql> explain format=json update titles set full_title=concat((select dept_name from departments join dept_emp using(dept_no) where dept_emp.emp_no=titles.emp_no and dept_emp.to_date='9999-01-01') ,' ', title) where to_date = '9999-01-01'G
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"table": {
"update": true,
"table_name": "titles",
"access_type": "index",
"key": "PRIMARY",
"used_key_parts": [
"emp_no",
"title",
"from_date"
],
"key_length": "59",
"rows_examined_per_scan": 442843,
"filtered": "100.00",
"using_temporary_table": "for update",
"attached_condition": "(`employees`.`titles`.`to_date` = '9999-01-01')"
},
"update_value_subqueries": [
{
"dependent": true,
"cacheable": false,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "1.35"
},
"nested_loop": [
{
"table": {
"table_name": "dept_emp",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"emp_no",
"dept_no"
],
"key": "PRIMARY",
"used_key_parts": [
"emp_no"
],
"key_length": "4",
"ref": [
"employees.titles.emp_no"
],
"rows_examined_per_scan": 1,
"rows_produced_per_join": 0,
"filtered": "10.00",
"cost_info": {
"read_cost": "1.00",
"eval_cost": "0.02",
"prefix_cost": "1.22",
"data_read_per_join": "1"
},
"used_columns": [
"emp_no",
"dept_no",
"to_date"
],
"attached_condition": "(`employees`.`dept_emp`.`to_date` = '9999-01-01')"
}
},
{
"table": {
"table_name": "departments",
"access_type": "eq_ref",
"possible_keys": [
"PRIMARY"
],
"key": "PRIMARY",
"used_key_parts": [
"dept_no"
],
"key_length": "4",
"ref": [
"employees.dept_emp.dept_no"
],
"rows_examined_per_scan": 1,
"rows_produced_per_join": 0,
"filtered": "100.00",
"cost_info": {
"read_cost": "0.11",
"eval_cost": "0.02",
"prefix_cost": "1.35",
"data_read_per_join": "5"
},
"used_columns": [
"dept_no",
"dept_name"
]
}
}
]
}
}
]
}
}
1 row in set, 1 warning (0.00 sec)
Note (Code 1276): Field or reference 'employees.titles.emp_no' of SELECT #2 was resolved in SELECT #1
We can see in this output that the subquery is dependent, not cacheable, and will be executed for each row that needs to be updated.
Conclusion:
EXPLAIN FORMAT=JSON
provides various information about all kind of subqueries.