Не могли бы вы, пожалуйста, помогите мне с MySQL (Join, Group By, Indexes) < /p>
Существуют таблицы «Проекты» (362K Rows) и «Projects_emploeees» (4,27 млн строк), с одной стороны. Я пытаюсь получить агрегированные данные для каждого сотрудника, и это занимает 6-7 секунд. Есть ли возможность сделать его быстрее или это лучший?CREATE TABLE `projects` (
`id` int NOT NULL AUTO_INCREMENT,
`client_id` int DEFAULT NULL,
`manager_id` int DEFAULT NULL,
`team_size` int DEFAULT NULL,
`status_code` int DEFAULT NULL,
`priority_level` int DEFAULT NULL,
`risk_level` int DEFAULT NULL,
`estimated_hours` int DEFAULT NULL,
`actual_hours` int DEFAULT NULL,
`remaining_hours` int DEFAULT NULL,
`budget_cents` int DEFAULT NULL,
`cost_cents` int DEFAULT NULL,
`progress_percent` int DEFAULT NULL,
`tasks_total` int DEFAULT NULL,
`tasks_completed` int DEFAULT NULL,
`bugs_found` int DEFAULT NULL,
`bugs_fixed` int DEFAULT NULL,
`meetings_held` int DEFAULT NULL,
`files_uploaded` int DEFAULT NULL,
`comments_posted` int DEFAULT NULL,
`reviews_requested` int DEFAULT NULL,
`approvals_received` int DEFAULT NULL,
`escalations` int DEFAULT NULL,
`feedback_score` int DEFAULT NULL,
`archived` tinyint DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=363243 DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_unicode_ci
CREATE TABLE `projects_employees` (
`id` int NOT NULL AUTO_INCREMENT,
`project_id` int NOT NULL,
`employee_id` int NOT NULL,
`department_id` int DEFAULT NULL,
`role_code` int DEFAULT NULL,
`hours_allocated` int DEFAULT NULL,
`hours_logged` int DEFAULT NULL,
`is_active` tinyint DEFAULT '1',
`joined_at` date DEFAULT NULL,
`left_at` date DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_employee_project` (`employee_id`,`project_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `projects_employees_ibfk_1` FOREIGN KEY (`project_id`) REFERENCES `projects` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4325311 DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_unicode_ci
< /code>
Запрос: < /p>
-- It takes 6-7 seconds
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects_employees.hours_allocated) as hours_allocated,
SUM(projects_employees.hours_logged) as hours_logged,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
LEFT JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
n.b.: левый не требуется функционально, за исключением того, что без него это было даже медленнее.
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| 1 | SIMPLE | projects_employees | NULL | index | idx_employee_project | idx_employee_project | 8 | NULL | 4259544 | 100.00 | Using index |
| 1 | SIMPLE | projects | NULL | eq_ref | PRIMARY | PRIMARY | 4 | rates.projects_employees.project_id | 1 | 100.00 | NULL |
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
< /code>
заметил, что: < /p>
-- It takes 4.8-5.2 seconds
EXPLAIN SELECT SUM(hours_allocated) a, SUM(hours_logged)
FROM projects_employees
GROUP BY projects_employees.employee_id;
-- It takes 4.11 seconds
EXPLAIN SELECT projects_employees.id, projects.estimated_hours
FROM projects_employees
LEFT JOIN projects ON projects.id=projects_employees.project_id;
< /code>
Некоторые точки: < /p>
Ресурсы: 6 ЦП, 16 ГБ ОЗУ, 12 ГБ innodb_buffer_pool_size < /li>
MySQL версия 8.0.42, двигатель Innodb < /li>
С тех пор, как мы уже нуждаемся в одном годовании. 'Projects_2024', 'Projects_employeees_2024' и т. Д., В производстве < /li>
На базовом запросе нет фильтров по «проектам», поэтому все строки должны быть получены. Но будет возможность настроить запрос с фильтрами «Проекты» < /li>
< /ol>
upd1.
-- Via JOIN (8.7-9.3, the order in Explanation is different)
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
| 1 | SIMPLE | projects | NULL | ALL | PRIMARY | NULL | NULL | NULL | 360819 | 100.00 | Using temporary |
| 1 | SIMPLE | projects_employees | NULL | ref | idx_employee_project,project_id | project_id | 4 | rates.projects.id | 11 | 100.00 | NULL |
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
-- Via STRAIGHT_JOIN (6-7seconds, the order in Explanation the same)
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
STRAIGHT_JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| 1 | SIMPLE | projects_employees | NULL | index | idx_employee_project,project_id | idx_employee_project | 8 | NULL | 4259544 | 100.00 | Using index |
| 1 | SIMPLE | projects | NULL | eq_ref | PRIMARY | PRIMARY | 4 | rates.projects_employees.project_id | 1 | 100.00 | NULL |
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
Подробнее здесь: https://stackoverflow.com/questions/796 ... th-4m-rows
Как ускорить запрос MySQL, содержащий левый соединение и группировку с 4 м+ рядами? ⇐ MySql
Форум по Mysql
1752114518
Anonymous
Не могли бы вы, пожалуйста, помогите мне с MySQL (Join, Group By, Indexes) < /p>
Существуют таблицы «Проекты» (362K Rows) и «Projects_emploeees» (4,27 млн строк), с одной стороны. Я пытаюсь получить агрегированные данные для каждого сотрудника, и это занимает 6-7 секунд. Есть ли возможность сделать его быстрее или это лучший?CREATE TABLE `projects` (
`id` int NOT NULL AUTO_INCREMENT,
`client_id` int DEFAULT NULL,
`manager_id` int DEFAULT NULL,
`team_size` int DEFAULT NULL,
`status_code` int DEFAULT NULL,
`priority_level` int DEFAULT NULL,
`risk_level` int DEFAULT NULL,
`estimated_hours` int DEFAULT NULL,
`actual_hours` int DEFAULT NULL,
`remaining_hours` int DEFAULT NULL,
`budget_cents` int DEFAULT NULL,
`cost_cents` int DEFAULT NULL,
`progress_percent` int DEFAULT NULL,
`tasks_total` int DEFAULT NULL,
`tasks_completed` int DEFAULT NULL,
`bugs_found` int DEFAULT NULL,
`bugs_fixed` int DEFAULT NULL,
`meetings_held` int DEFAULT NULL,
`files_uploaded` int DEFAULT NULL,
`comments_posted` int DEFAULT NULL,
`reviews_requested` int DEFAULT NULL,
`approvals_received` int DEFAULT NULL,
`escalations` int DEFAULT NULL,
`feedback_score` int DEFAULT NULL,
`archived` tinyint DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=363243 DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_unicode_ci
CREATE TABLE `projects_employees` (
`id` int NOT NULL AUTO_INCREMENT,
`project_id` int NOT NULL,
`employee_id` int NOT NULL,
`department_id` int DEFAULT NULL,
`role_code` int DEFAULT NULL,
`hours_allocated` int DEFAULT NULL,
`hours_logged` int DEFAULT NULL,
`is_active` tinyint DEFAULT '1',
`joined_at` date DEFAULT NULL,
`left_at` date DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `idx_employee_project` (`employee_id`,`project_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `projects_employees_ibfk_1` FOREIGN KEY (`project_id`) REFERENCES `projects` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4325311 DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_unicode_ci
< /code>
Запрос: < /p>
-- It takes 6-7 seconds
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects_employees.hours_allocated) as hours_allocated,
SUM(projects_employees.hours_logged) as hours_logged,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
LEFT JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
n.b.: левый не требуется функционально, за исключением того, что без него это было даже медленнее.
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| 1 | SIMPLE | projects_employees | NULL | index | idx_employee_project | idx_employee_project | 8 | NULL | 4259544 | 100.00 | Using index |
| 1 | SIMPLE | projects | NULL | eq_ref | PRIMARY | PRIMARY | 4 | rates.projects_employees.project_id | 1 | 100.00 | NULL |
+----+-------------+--------------------+------------+--------+----------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
< /code>
заметил, что: < /p>
-- It takes 4.8-5.2 seconds
EXPLAIN SELECT SUM(hours_allocated) a, SUM(hours_logged)
FROM projects_employees
GROUP BY projects_employees.employee_id;
-- It takes 4.11 seconds
EXPLAIN SELECT projects_employees.id, projects.estimated_hours
FROM projects_employees
LEFT JOIN projects ON projects.id=projects_employees.project_id;
< /code>
Некоторые точки: < /p>
Ресурсы: 6 ЦП, 16 ГБ ОЗУ, 12 ГБ innodb_buffer_pool_size < /li>
MySQL версия 8.0.42, двигатель Innodb < /li>
С тех пор, как мы уже нуждаемся в одном годовании. 'Projects_2024', 'Projects_employeees_2024' и т. Д., В производстве < /li>
На базовом запросе нет фильтров по «проектам», поэтому все строки должны быть получены. Но будет возможность настроить запрос с фильтрами «Проекты» < /li>
< /ol>
upd1.
-- Via JOIN (8.7-9.3, the order in Explanation is different)
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
| 1 | SIMPLE | projects | NULL | ALL | PRIMARY | NULL | NULL | NULL | 360819 | 100.00 | Using temporary |
| 1 | SIMPLE | projects_employees | NULL | ref | idx_employee_project,project_id | project_id | 4 | rates.projects.id | 11 | 100.00 | NULL |
+----+-------------+--------------------+------------+------+---------------------------------+------------+---------+-------------------+--------+----------+-----------------+
-- Via STRAIGHT_JOIN (6-7seconds, the order in Explanation the same)
EXPLAIN SELECT
projects_employees.employee_id,
SUM(projects.estimated_hours) as estimated_hours,
SUM(projects.actual_hours) as actual_hours,
SUM(projects.budget_cents) as budget_cents,
SUM(projects.cost_cents) as cost_cents,
SUM(projects.tasks_total) as tasks_total,
SUM(projects.bugs_fixed) as bugs_fixed
FROM projects_employees
STRAIGHT_JOIN projects ON projects.id=projects_employees.project_id
GROUP BY projects_employees.employee_id;
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
| 1 | SIMPLE | projects_employees | NULL | index | idx_employee_project,project_id | idx_employee_project | 8 | NULL | 4259544 | 100.00 | Using index |
| 1 | SIMPLE | projects | NULL | eq_ref | PRIMARY | PRIMARY | 4 | rates.projects_employees.project_id | 1 | 100.00 | NULL |
+----+-------------+--------------------+------------+--------+---------------------------------+----------------------+---------+-------------------------------------+---------+----------+-------------+
Подробнее здесь: [url]https://stackoverflow.com/questions/79696006/how-to-speed-up-mysql-query-containing-left-join-and-group-by-with-4m-rows[/url]
Ответить
1 сообщение
• Страница 1 из 1
Перейти
- Кемерово-IT
- ↳ Javascript
- ↳ C#
- ↳ JAVA
- ↳ Elasticsearch aggregation
- ↳ Python
- ↳ Php
- ↳ Android
- ↳ Html
- ↳ Jquery
- ↳ C++
- ↳ IOS
- ↳ CSS
- ↳ Excel
- ↳ Linux
- ↳ Apache
- ↳ MySql
- Детский мир
- Для души
- ↳ Музыкальные инструменты даром
- ↳ Печатная продукция даром
- Внешняя красота и здоровье
- ↳ Одежда и обувь для взрослых даром
- ↳ Товары для здоровья
- ↳ Физкультура и спорт
- Техника - даром!
- ↳ Автомобилистам
- ↳ Компьютерная техника
- ↳ Плиты: газовые и электрические
- ↳ Холодильники
- ↳ Стиральные машины
- ↳ Телевизоры
- ↳ Телефоны, смартфоны, плашеты
- ↳ Швейные машинки
- ↳ Прочая электроника и техника
- ↳ Фототехника
- Ремонт и интерьер
- ↳ Стройматериалы, инструмент
- ↳ Мебель и предметы интерьера даром
- ↳ Cантехника
- Другие темы
- ↳ Разное даром
- ↳ Давай меняться!
- ↳ Отдам\возьму за копеечку
- ↳ Работа и подработка в Кемерове
- ↳ Давай с тобой поговорим...
Мобильная версия