Перебалансировка емкости хранилища OSD CephLinux

Ответить
Anonymous
 Перебалансировка емкости хранилища OSD Ceph

Сообщение Anonymous »

Я пытаюсь развернуть кластер Ceph с 3 серверами. Каждый сервер имеет 3 HDD и 2 SSD (9 HDD + 3 SSD). Казалось, все работало нормально, пока я полностью не отключил узел и не продолжил загрузку данных для проверки отказоустойчивости кластера Ceph. Однако, как вы можете видеть, данные концентрируются в двух OSD, 4 и 5. Я попробовал запустить балансировщик ceph, но это не сработало. Мы с нетерпением ждем от вас помощи.
`
ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "ssd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -12,
"item_name": "default~ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 2,
"rule_name": "hdd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -2,
"item_name": "default~hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]

ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME
-14 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - host hdd
-13 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - host ssd
-1 153.66357 - 154 TiB 709 GiB 702 GiB 486 MiB 6.6 GiB 153 TiB 0.45 1.00 - root default
-3 51.29396 - 51 TiB 337 GiB 334 GiB 242 MiB 2.8 GiB 51 TiB 0.64 1.42 - host s3-ceph01
0 hdd 16.37059 1.00000 16 TiB 3.4 GiB 2.2 GiB 240 MiB 941 MiB 16 TiB 0.02 0.04 96 up osd.0
3 hdd 16.37059 1.00000 16 TiB 2.3 GiB 2.2 GiB 11 KiB 142 MiB 16 TiB 0.01 0.03 94 up osd.3
4 hdd 16.37059 0.80000 16 TiB 297 GiB 295 GiB 87 KiB 1.5 GiB 16 TiB 1.77 3.93 64 up osd.4
1 ssd 1.09109 0.80005 1.1 TiB 17 GiB 17 GiB 1009 KiB 132 MiB 1.1 TiB 1.54 3.41 88 up osd.1
2 ssd 1.09109 0.85004 1.1 TiB 17 GiB 17 GiB 1.2 MiB 95 MiB 1.1 TiB 1.54 3.42 92 up osd.2
-5 51.29396 - 51 TiB 334 GiB 331 GiB 242 MiB 2.7 GiB 51 TiB 0.64 1.41 - host s3-ceph02
5 hdd 16.37059 0.83000 16 TiB 297 GiB 296 GiB 6 KiB 1.4 GiB 16 TiB 1.77 3.93 72 up osd.5
8 hdd 16.37059 1.00000 16 TiB 2.2 GiB 2.1 GiB 28 KiB 113 MiB 16 TiB 0.01 0.03 91 up osd.8
9 hdd 16.37059 1.00000 16 TiB 2.3 GiB 2.2 GiB 22 KiB 104 MiB 16 TiB 0.01 0.03 85 up osd.9
6 ssd 1.09109 0.80005 1.1 TiB 16 GiB 15 GiB 241 MiB 957 MiB 1.1 TiB 1.42 3.16 80 up osd.6
7 ssd 1.09109 0.90002 1.1 TiB 16 GiB 16 GiB 792 KiB 119 MiB 1.1 TiB 1.47 3.27 93 up osd.7
-7 51.07565 - 51 TiB 38 GiB 37 GiB 1.9 MiB 1.1 GiB 51 TiB 0.07 0.16 - host s3-ceph03
10 hdd 16.37059 1.00000 16 TiB 2.5 GiB 2.4 GiB 7 KiB 88 MiB 16 TiB 0.02 0.03 96 up osd.10
11 hdd 16.37059 1.00000 16 TiB 2.4 GiB 2.3 GiB 5 KiB 72 MiB 16 TiB 0.01 0.03 85 up osd.11
12 hdd 16.37059 1.00000 16 TiB 2.9 GiB 2.2 GiB 12 KiB 759 MiB 16 TiB 0.02 0.04 85 up osd.12
13 ssd 0.87279 0.85004 894 GiB 13 GiB 13 GiB 612 KiB 91 MiB 880 GiB 1.49 3.30 74 up osd.13
14 ssd 1.09109 0.85004 1.1 TiB 17 GiB 17 GiB 1.3 MiB 101 MiB 1.1 TiB 1.53 3.40 93 up osd.14
TOTAL 154 TiB 709 GiB 702 GiB 486 MiB 6.6 GiB 153 TiB 0.45
MIN/MAX VAR: 0.03/3.93 STDDEV: 0.84

cluster:
id: 4028b5e4-ca9c-11f0-9a27-b8ca3af96173
health: HEALTH_OK

services:
mon: 3 daemons, quorum s3-ceph01,s3-ceph02,s3-ceph03 (age 2h)
mgr: s3-ceph01.mwhiai(active, since 4h), standbys: s3-ceph02.rlcvof, s3-ceph03.tsfdhy
osd: 15 osds: 15 up (since 2h), 15 in (since 52m)
rgw: 3 daemons active (3 hosts, 1 zones)

data:
pools: 12 pools, 644 pgs
objects: 719.97k objects, 350 GiB
usage: 710 GiB used, 153 TiB / 154 TiB avail
pgs: 643 active+clean
1 active+clean+scrubbing

io:
client: 207 KiB/s rd, 14 MiB/s wr, 248 op/s rd, 411 op/s wr

oot@s3-ceph02:~# ceph balancer eval
current cluster score 0.062858 (lower is better)
read_balance_scores (lower is better) {'.mgr': 15.380000114440918, '.rgw.root': 3.2699999809265137, 'htv.rgw.log': 3.299999952316284, 'htv.rgw.control': 3.7300000190734863, 'htv.rgw.meta': 2.819999933242798, 's3.ssd.data': 1.2699999809265137, 's3.ssd.index': 1.2200000286102295, 's3.hdd.data': 1.1299999952316284, 's3.hdd.index': 1.5, 'htv.rgw.buckets.index': 15.380000114440918, 'htv.rgw.buckets.data': 15.380000114440918, 'htv.rgw.buckets.non-ec': 15.380000114440918}
root@s3-ceph02:~# ceph osd reweight-by-utilization 5
Error EINVAL: You must give a percentage higher than 100. The reweighting threshold will be calculated as times . For example, an argument of 200 would reweight OSDs which are twice as utilized as the average OSD.
FAILED reweight-by-pg
root@s3-ceph02:~# ceph config get osd osd_recovery_max_active
3
root@s3-ceph02:~# ceph osd reweight-by-utilization 120
moved 15 / 1288 (1.1646%)
avg 85.8667
stddev 6.68198 -> 6.97487 (expected baseline 8.95222)
min osd.13 with 71 -> 73 pgs (0.826863 -> 0.850155 * mean)
max osd.1 with 96 -> 93 pgs (1.11801 -> 1.08307 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0041
overload_utilization 0.0050
osd.1 weight 0.9000 -> 0.8500
osd.4 weight 0.9500 -> 0.9000
osd.5 weight 1.0000 -> 0.9500
osd.14 weight 0.9000 -> 0.8500

root@s3-ceph02:~# ceph osd test-reweight-by-utilization 120 0.05 4 --no-increasing
moved 20 / 1288 (1.5528%)
avg 85.8667
stddev 9.27266 -> 10.8926 (expected baseline 8.95222)
min osd.4 with 64 -> 58 pgs (0.745342 -> 0.675466 * mean)
max osd.0 with 96 -> 100 pgs (1.11801 -> 1.1646 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0042
overload_utilization 0.0051
osd.4 weight 0.8000 -> 0.7500
osd.5 weight 0.8300 -> 0.7800
osd.2 weight 0.8500 -> 0.8000
osd.1 weight 0.8000 -> 0.7501
no change
root@s3-ceph02:~# ceph balancer status
{
"active": true,
"last_optimize_duration": "0:00:00.006339",
"last_optimize_started": "Tue Dec 16 09:04:28 2025",
"mode": "crush-compat",
"no_optimization_needed": true,
"optimize_result": "Some osds belong to multiple subtrees: {0: ['default~hdd', 'default'], 1: ['default~ssd', 'default'], 2: ['default~ssd', 'default'], 3: ['default~hdd', 'default'], 4: ['default~hdd', 'default'], 5: ['default~hdd', 'default'], 6: ['default~ssd', 'default'], 7: ['default~ssd', 'default'], 8: ['default~hdd', 'default'], 9: ['default~hdd', 'default'], 10: ['default~hdd', 'default'], 11: ['default~hdd', 'default'], 12: ['default~hdd', 'default'], 13: ['default~ssd', 'default'], 14: ['default~ssd', 'default']}",
"plans": []
}
153 ceph config set osd osd_recovery_max_active 3
171 ceph config set global osd_recovery_max_active 3


Подробнее здесь: https://stackoverflow.com/questions/798 ... e-capacity
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Linux»