Перебалансировка емкости хранилища OSD Ceph

Перебалансировка емкости хранилища OSD Ceph ⇐ Linux

1 сообщение • Страница 1 из 1

Anonymous

Перебалансировка емкости хранилища OSD Ceph

Цитата

Сообщение Anonymous » 16 дек 2025, 12:43

Я пытаюсь развернуть кластер Ceph с 3 серверами. Каждый сервер имеет 3 HDD и 2 SSD (9 HDD + 3 SSD). Казалось, все работало нормально, пока я полностью не отключил узел и не продолжил загрузку данных для проверки отказоустойчивости кластера Ceph. Однако, как вы можете видеть, данные концентрируются в двух OSD, 4 и 5. Я попробовал запустить балансировщик ceph, но это не сработало. Мы с нетерпением ждем от вас помощи.
`
ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "ssd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -12,
"item_name": "default~ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 2,
"rule_name": "hdd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -2,
"item_name": "default~hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]

ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME
-14 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - host hdd
-13 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - host ssd
-1 153.66357 - 154 TiB 709 GiB 702 GiB 486 MiB 6.6 GiB 153 TiB 0.45 1.00 - root default
-3 51.29396 - 51 TiB 337 GiB 334 GiB 242 MiB 2.8 GiB 51 TiB 0.64 1.42 - host s3-ceph01
0 hdd 16.37059 1.00000 16 TiB 3.4 GiB 2.2 GiB 240 MiB 941 MiB 16 TiB 0.02 0.04 96 up osd.0
3 hdd 16.37059 1.00000 16 TiB 2.3 GiB 2.2 GiB 11 KiB 142 MiB 16 TiB 0.01 0.03 94 up osd.3
4 hdd 16.37059 0.80000 16 TiB 297 GiB 295 GiB 87 KiB 1.5 GiB 16 TiB 1.77 3.93 64 up osd.4
1 ssd 1.09109 0.80005 1.1 TiB 17 GiB 17 GiB 1009 KiB 132 MiB 1.1 TiB 1.54 3.41 88 up osd.1
2 ssd 1.09109 0.85004 1.1 TiB 17 GiB 17 GiB 1.2 MiB 95 MiB 1.1 TiB 1.54 3.42 92 up osd.2
-5 51.29396 - 51 TiB 334 GiB 331 GiB 242 MiB 2.7 GiB 51 TiB 0.64 1.41 - host s3-ceph02
5 hdd 16.37059 0.83000 16 TiB 297 GiB 296 GiB 6 KiB 1.4 GiB 16 TiB 1.77 3.93 72 up osd.5
8 hdd 16.37059 1.00000 16 TiB 2.2 GiB 2.1 GiB 28 KiB 113 MiB 16 TiB 0.01 0.03 91 up osd.8
9 hdd 16.37059 1.00000 16 TiB 2.3 GiB 2.2 GiB 22 KiB 104 MiB 16 TiB 0.01 0.03 85 up osd.9
6 ssd 1.09109 0.80005 1.1 TiB 16 GiB 15 GiB 241 MiB 957 MiB 1.1 TiB 1.42 3.16 80 up osd.6
7 ssd 1.09109 0.90002 1.1 TiB 16 GiB 16 GiB 792 KiB 119 MiB 1.1 TiB 1.47 3.27 93 up osd.7
-7 51.07565 - 51 TiB 38 GiB 37 GiB 1.9 MiB 1.1 GiB 51 TiB 0.07 0.16 - host s3-ceph03
10 hdd 16.37059 1.00000 16 TiB 2.5 GiB 2.4 GiB 7 KiB 88 MiB 16 TiB 0.02 0.03 96 up osd.10
11 hdd 16.37059 1.00000 16 TiB 2.4 GiB 2.3 GiB 5 KiB 72 MiB 16 TiB 0.01 0.03 85 up osd.11
12 hdd 16.37059 1.00000 16 TiB 2.9 GiB 2.2 GiB 12 KiB 759 MiB 16 TiB 0.02 0.04 85 up osd.12
13 ssd 0.87279 0.85004 894 GiB 13 GiB 13 GiB 612 KiB 91 MiB 880 GiB 1.49 3.30 74 up osd.13
14 ssd 1.09109 0.85004 1.1 TiB 17 GiB 17 GiB 1.3 MiB 101 MiB 1.1 TiB 1.53 3.40 93 up osd.14
TOTAL 154 TiB 709 GiB 702 GiB 486 MiB 6.6 GiB 153 TiB 0.45
MIN/MAX VAR: 0.03/3.93 STDDEV: 0.84

cluster:
id: 4028b5e4-ca9c-11f0-9a27-b8ca3af96173
health: HEALTH_OK

services:
mon: 3 daemons, quorum s3-ceph01,s3-ceph02,s3-ceph03 (age 2h)
mgr: s3-ceph01.mwhiai(active, since 4h), standbys: s3-ceph02.rlcvof, s3-ceph03.tsfdhy
osd: 15 osds: 15 up (since 2h), 15 in (since 52m)
rgw: 3 daemons active (3 hosts, 1 zones)

data:
pools: 12 pools, 644 pgs
objects: 719.97k objects, 350 GiB
usage: 710 GiB used, 153 TiB / 154 TiB avail
pgs: 643 active+clean
1 active+clean+scrubbing

io:
client: 207 KiB/s rd, 14 MiB/s wr, 248 op/s rd, 411 op/s wr

oot@s3-ceph02:~# ceph balancer eval
current cluster score 0.062858 (lower is better)
read_balance_scores (lower is better) {'.mgr': 15.380000114440918, '.rgw.root': 3.2699999809265137, 'htv.rgw.log': 3.299999952316284, 'htv.rgw.control': 3.7300000190734863, 'htv.rgw.meta': 2.819999933242798, 's3.ssd.data': 1.2699999809265137, 's3.ssd.index': 1.2200000286102295, 's3.hdd.data': 1.1299999952316284, 's3.hdd.index': 1.5, 'htv.rgw.buckets.index': 15.380000114440918, 'htv.rgw.buckets.data': 15.380000114440918, 'htv.rgw.buckets.non-ec': 15.380000114440918}
root@s3-ceph02:~# ceph osd reweight-by-utilization 5
Error EINVAL: You must give a percentage higher than 100. The reweighting threshold will be calculated as times . For example, an argument of 200 would reweight OSDs which are twice as utilized as the average OSD.
FAILED reweight-by-pg
root@s3-ceph02:~# ceph config get osd osd_recovery_max_active
3
root@s3-ceph02:~# ceph osd reweight-by-utilization 120
moved 15 / 1288 (1.1646%)
avg 85.8667
stddev 6.68198 -> 6.97487 (expected baseline 8.95222)
min osd.13 with 71 -> 73 pgs (0.826863 -> 0.850155 * mean)
max osd.1 with 96 -> 93 pgs (1.11801 -> 1.08307 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0041
overload_utilization 0.0050
osd.1 weight 0.9000 -> 0.8500
osd.4 weight 0.9500 -> 0.9000
osd.5 weight 1.0000 -> 0.9500
osd.14 weight 0.9000 -> 0.8500

root@s3-ceph02:~# ceph osd test-reweight-by-utilization 120 0.05 4 --no-increasing
moved 20 / 1288 (1.5528%)
avg 85.8667
stddev 9.27266 -> 10.8926 (expected baseline 8.95222)
min osd.4 with 64 -> 58 pgs (0.745342 -> 0.675466 * mean)
max osd.0 with 96 -> 100 pgs (1.11801 -> 1.1646 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0042
overload_utilization 0.0051
osd.4 weight 0.8000 -> 0.7500
osd.5 weight 0.8300 -> 0.7800
osd.2 weight 0.8500 -> 0.8000
osd.1 weight 0.8000 -> 0.7501
no change
root@s3-ceph02:~# ceph balancer status
{
"active": true,
"last_optimize_duration": "0:00:00.006339",
"last_optimize_started": "Tue Dec 16 09:04:28 2025",
"mode": "crush-compat",
"no_optimization_needed": true,
"optimize_result": "Some osds belong to multiple subtrees: {0: ['default~hdd', 'default'], 1: ['default~ssd', 'default'], 2: ['default~ssd', 'default'], 3: ['default~hdd', 'default'], 4: ['default~hdd', 'default'], 5: ['default~hdd', 'default'], 6: ['default~ssd', 'default'], 7: ['default~ssd', 'default'], 8: ['default~hdd', 'default'], 9: ['default~hdd', 'default'], 10: ['default~hdd', 'default'], 11: ['default~hdd', 'default'], 12: ['default~hdd', 'default'], 13: ['default~ssd', 'default'], 14: ['default~ssd', 'default']}",
"plans": []
}
153 ceph config set osd osd_recovery_max_active 3
171 ceph config set global osd_recovery_max_active 3

Подробнее здесь: https://stackoverflow.com/questions/798 ... e-capacity

1765878205

Anonymous

Я пытаюсь развернуть кластер Ceph с 3 серверами. Каждый сервер имеет 3 HDD и 2 SSD (9 HDD + 3 SSD). Казалось, все работало нормально, пока я полностью не отключил узел и не продолжил загрузку данных для проверки отказоустойчивости кластера Ceph. Однако, как вы можете видеть, данные концентрируются в двух OSD, 4 и 5. Я попробовал запустить балансировщик ceph, но это не сработало.  Мы с нетерпением ждем от вас помощи.
`
ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "ssd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -12,
"item_name": "default~ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 2,
"rule_name": "hdd_rule",
"type": 1,
"steps": [
{
"op": "take",
"item": -2,
"item_name": "default~hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op":  "emit"
}
]
}
]

ID   CLASS  WEIGHT     REWEIGHT  SIZE     RAW USE  DATA     OMAP      META     AVAIL    %USE  VAR   PGS  STATUS  TYPE NAME
-14                 0         -      0 B      0 B      0 B       0 B      0 B      0 B     0     0    -          host hdd
-13                 0         -      0 B      0 B      0 B       0 B      0 B      0 B     0     0    -          host ssd
-1         153.66357         -  154 TiB  709 GiB  702 GiB   486 MiB  6.6 GiB  153 TiB  0.45  1.00    -          root default
-3          51.29396         -   51 TiB  337 GiB  334 GiB   242 MiB  2.8 GiB   51 TiB  0.64  1.42    -              host s3-ceph01
0    hdd   16.37059   1.00000   16 TiB  3.4 GiB  2.2 GiB   240 MiB  941 MiB   16 TiB  0.02  0.04   96      up          osd.0
3    hdd   16.37059   1.00000   16 TiB  2.3 GiB  2.2 GiB    11 KiB  142 MiB   16 TiB  0.01  0.03   94      up          osd.3
4    hdd   16.37059   0.80000   16 TiB  297 GiB  295 GiB    87 KiB  1.5 GiB   16 TiB  1.77  3.93   64      up          osd.4
1    ssd    1.09109   0.80005  1.1 TiB   17 GiB   17 GiB  1009 KiB  132 MiB  1.1 TiB  1.54  3.41   88      up          osd.1
2    ssd    1.09109   0.85004  1.1 TiB   17 GiB   17 GiB   1.2 MiB   95 MiB  1.1 TiB  1.54  3.42   92      up          osd.2
-5          51.29396         -   51 TiB  334 GiB  331 GiB   242 MiB  2.7 GiB   51 TiB  0.64  1.41    -              host s3-ceph02
5    hdd   16.37059   0.83000   16 TiB  297 GiB  296 GiB     6 KiB  1.4 GiB   16 TiB  1.77  3.93   72      up          osd.5
8    hdd   16.37059   1.00000   16 TiB  2.2 GiB  2.1 GiB    28 KiB  113 MiB   16 TiB  0.01  0.03   91      up          osd.8
9    hdd   16.37059   1.00000   16 TiB  2.3 GiB  2.2 GiB    22 KiB  104 MiB   16 TiB  0.01  0.03   85      up          osd.9
6    ssd    1.09109   0.80005  1.1 TiB   16 GiB   15 GiB   241 MiB  957 MiB  1.1 TiB  1.42  3.16   80      up          osd.6
7    ssd    1.09109   0.90002  1.1 TiB   16 GiB   16 GiB   792 KiB  119 MiB  1.1 TiB  1.47  3.27   93      up          osd.7
-7          51.07565         -   51 TiB   38 GiB   37 GiB   1.9 MiB  1.1 GiB   51 TiB  0.07  0.16    -              host s3-ceph03
10    hdd   16.37059   1.00000   16 TiB  2.5 GiB  2.4 GiB     7 KiB   88 MiB   16 TiB  0.02  0.03   96      up          osd.10
11    hdd   16.37059   1.00000   16 TiB  2.4 GiB  2.3 GiB     5 KiB   72 MiB   16 TiB  0.01  0.03   85      up          osd.11
12    hdd   16.37059   1.00000   16 TiB  2.9 GiB  2.2 GiB    12 KiB  759 MiB   16 TiB  0.02  0.04   85      up          osd.12
13    ssd    0.87279   0.85004  894 GiB   13 GiB   13 GiB   612 KiB   91 MiB  880 GiB  1.49  3.30   74      up          osd.13
14    ssd    1.09109   0.85004  1.1 TiB   17 GiB   17 GiB   1.3 MiB  101 MiB  1.1 TiB  1.53  3.40   93      up          osd.14
TOTAL  154 TiB  709 GiB  702 GiB   486 MiB  6.6 GiB  153 TiB  0.45
MIN/MAX VAR: 0.03/3.93  STDDEV: 0.84

  cluster:
id:     4028b5e4-ca9c-11f0-9a27-b8ca3af96173
health: HEALTH_OK

services:
mon: 3 daemons, quorum s3-ceph01,s3-ceph02,s3-ceph03 (age 2h)
mgr: s3-ceph01.mwhiai(active, since 4h), standbys: s3-ceph02.rlcvof, s3-ceph03.tsfdhy
osd: 15 osds: 15 up (since 2h), 15 in (since 52m)
rgw: 3 daemons active (3 hosts, 1 zones)

data:
pools:   12 pools, 644 pgs
objects: 719.97k objects, 350 GiB
usage:   710 GiB used, 153 TiB / 154 TiB avail
pgs:     643 active+clean
1   active+clean+scrubbing

io:
client:   207 KiB/s rd, 14 MiB/s wr, 248 op/s rd, 411 op/s wr

oot@s3-ceph02:~# ceph balancer eval
current cluster score 0.062858 (lower is better)
read_balance_scores (lower is better) {'.mgr': 15.380000114440918, '.rgw.root': 3.2699999809265137, 'htv.rgw.log': 3.299999952316284, 'htv.rgw.control': 3.7300000190734863, 'htv.rgw.meta': 2.819999933242798, 's3.ssd.data': 1.2699999809265137, 's3.ssd.index': 1.2200000286102295, 's3.hdd.data': 1.1299999952316284, 's3.hdd.index': 1.5, 'htv.rgw.buckets.index': 15.380000114440918, 'htv.rgw.buckets.data': 15.380000114440918, 'htv.rgw.buckets.non-ec': 15.380000114440918}
root@s3-ceph02:~# ceph osd reweight-by-utilization 5
Error EINVAL: You must give a percentage higher than 100. The reweighting threshold will be calculated as  times .  For example, an argument of 200 would reweight OSDs which are twice as utilized as the average OSD.
FAILED reweight-by-pg
root@s3-ceph02:~# ceph config get osd osd_recovery_max_active
3
root@s3-ceph02:~# ceph osd reweight-by-utilization 120
moved 15 / 1288 (1.1646%)
avg 85.8667
stddev 6.68198 -> 6.97487 (expected baseline 8.95222)
min osd.13 with 71 -> 73 pgs (0.826863 -> 0.850155 * mean)
max osd.1 with 96 -> 93 pgs (1.11801 -> 1.08307 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0041
overload_utilization 0.0050
osd.1 weight 0.9000 -> 0.8500
osd.4 weight 0.9500 -> 0.9000
osd.5 weight 1.0000 -> 0.9500
osd.14 weight 0.9000 -> 0.8500

root@s3-ceph02:~# ceph osd test-reweight-by-utilization 120 0.05 4 --no-increasing
moved 20 / 1288 (1.5528%)
avg 85.8667
stddev 9.27266 -> 10.8926 (expected baseline 8.95222)
min osd.4 with 64 -> 58 pgs (0.745342 -> 0.675466 * mean)
max osd.0 with 96 -> 100 pgs (1.11801 -> 1.1646 * mean)

oload 120
max_change 0.05
max_change_osds 4
average_utilization 0.0042
overload_utilization 0.0051
osd.4 weight 0.8000 -> 0.7500
osd.5 weight 0.8300 -> 0.7800
osd.2 weight 0.8500 -> 0.8000
osd.1 weight 0.8000 -> 0.7501
no change
root@s3-ceph02:~# ceph balancer status
{
"active": true,
"last_optimize_duration": "0:00:00.006339",
"last_optimize_started": "Tue Dec 16 09:04:28 2025",
"mode": "crush-compat",
"no_optimization_needed": true,
"optimize_result": "Some osds belong to multiple subtrees: {0: ['default~hdd', 'default'], 1: ['default~ssd', 'default'], 2: ['default~ssd', 'default'], 3: ['default~hdd', 'default'], 4: ['default~hdd', 'default'], 5: ['default~hdd', 'default'], 6: ['default~ssd', 'default'], 7: ['default~ssd', 'default'], 8: ['default~hdd', 'default'], 9: ['default~hdd', 'default'], 10: ['default~hdd', 'default'], 11: ['default~hdd', 'default'], 12: ['default~hdd', 'default'], 13: ['default~ssd', 'default'], 14: ['default~ssd', 'default']}",
"plans": []
}
153  ceph config set osd osd_recovery_max_active 3
171  ceph config set global osd_recovery_max_active 3
 

Подробнее здесь: [url]https://stackoverflow.com/questions/79848397/rebalance-osd-ceph-storage-capacity[/url]