Files
pve-storage/PVE/Storage/LVMPlugin.pm
Fabian Ebner 6fbffac0a3 lvm thin: don't assume that a thin pool and its volumes are active
There are cases where autoactivation can fail, as reported in the
community forum [0]. And it could also be that a volume was
deactivated by something outside of our control.

It doesn't seem strictly necessary to activate the thin pool itself
(creating/removing/activating LVs within the pool still works if it's
not active), but it does not report usage information as long as
neither the pool nor any of its LVs are active. Activate the pool for
that, for being able to use the flag in status(), and it should also
serve as a good indicator that there's a problem with the pool if it
can't be activated.

Before activating, check the (cached) lv_state from lvm_list_volumes.
It's necessary to update the cache in activate_storage, because the
flag is re-used in status(). Also update it for other (de)activations
to be more future-proof.

[0]: https://forum.proxmox.com/threads/local-lvm-not-available-after-kernel-update-on-pve-7.97406

Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
2021-11-10 14:23:54 +01:00

742 lines
19 KiB
Perl

package PVE::Storage::LVMPlugin;
use strict;
use warnings;
use IO::File;
use PVE::Tools qw(run_command trim);
use PVE::Storage::Plugin;
use PVE::JSONSchema qw(get_standard_option);
use base qw(PVE::Storage::Plugin);
# lvm helper functions
my $ignore_no_medium_warnings = sub {
my $line = shift;
# ignore those, most of the time they're from (virtual) IPMI/iKVM devices
# and just spam the log..
if ($line !~ /open failed: No medium found/) {
print STDERR "$line\n";
}
};
sub lvm_pv_info {
my ($device) = @_;
die "no device specified" if !$device;
my $has_label = 0;
my $cmd = ['/usr/bin/file', '-L', '-s', $device];
run_command($cmd, outfunc => sub {
my $line = shift;
$has_label = 1 if $line =~ m/LVM2/;
});
return undef if !$has_label;
$cmd = ['/sbin/pvs', '--separator', ':', '--noheadings', '--units', 'k',
'--unbuffered', '--nosuffix', '--options',
'pv_name,pv_size,vg_name,pv_uuid', $device];
my $pvinfo;
run_command($cmd, outfunc => sub {
my $line = shift;
$line = trim($line);
my ($pvname, $size, $vgname, $uuid) = split(':', $line);
die "found multiple pvs entries for device '$device'\n"
if $pvinfo;
$pvinfo = {
pvname => $pvname,
size => int($size),
vgname => $vgname,
uuid => $uuid,
};
});
return $pvinfo;
}
sub clear_first_sector {
my ($dev) = shift;
if (my $fh = IO::File->new($dev, "w")) {
my $buf = 0 x 512;
syswrite $fh, $buf;
$fh->close();
}
}
sub lvm_create_volume_group {
my ($device, $vgname, $shared) = @_;
my $res = lvm_pv_info($device);
if ($res->{vgname}) {
return if $res->{vgname} eq $vgname; # already created
die "device '$device' is already used by volume group '$res->{vgname}'\n";
}
clear_first_sector($device); # else pvcreate fails
# we use --metadatasize 250k, which reseults in "pe_start = 512"
# so pe_start is aligned on a 128k boundary (advantage for SSDs)
my $cmd = ['/sbin/pvcreate', '--metadatasize', '250k', $device];
run_command($cmd, errmsg => "pvcreate '$device' error");
$cmd = ['/sbin/vgcreate', $vgname, $device];
# push @$cmd, '-c', 'y' if $shared; # we do not use this yet
run_command($cmd, errmsg => "vgcreate $vgname $device error", errfunc => $ignore_no_medium_warnings, outfunc => $ignore_no_medium_warnings);
}
sub lvm_destroy_volume_group {
my ($vgname) = @_;
run_command(
['vgremove', '-y', $vgname],
errmsg => "unable to remove volume group $vgname",
errfunc => $ignore_no_medium_warnings,
outfunc => $ignore_no_medium_warnings,
);
}
sub lvm_vgs {
my ($includepvs) = @_;
my $cmd = ['/sbin/vgs', '--separator', ':', '--noheadings', '--units', 'b',
'--unbuffered', '--nosuffix', '--options'];
my $cols = [qw(vg_name vg_size vg_free lv_count)];
if ($includepvs) {
push @$cols, qw(pv_name pv_size pv_free);
}
push @$cmd, join(',', @$cols);
my $vgs = {};
eval {
run_command($cmd, outfunc => sub {
my $line = shift;
$line = trim($line);
my ($name, $size, $free, $lvcount, $pvname, $pvsize, $pvfree) = split (':', $line);
$vgs->{$name} //= {
size => int ($size),
free => int ($free),
lvcount => int($lvcount)
};
if (defined($pvname) && defined($pvsize) && defined($pvfree)) {
push @{$vgs->{$name}->{pvs}}, {
name => $pvname,
size => int($pvsize),
free => int($pvfree),
};
}
},
errfunc => $ignore_no_medium_warnings,
);
};
my $err = $@;
# just warn (vgs return error code 5 if clvmd does not run)
# but output is still OK (list without clustered VGs)
warn $err if $err;
return $vgs;
}
sub lvm_list_volumes {
my ($vgname) = @_;
my $option_list = 'vg_name,lv_name,lv_size,lv_attr,pool_lv,data_percent,metadata_percent,snap_percent,uuid,tags,metadata_size,time';
my $cmd = [
'/sbin/lvs', '--separator', ':', '--noheadings', '--units', 'b',
'--unbuffered', '--nosuffix',
'--config', 'report/time_format="%s"',
'--options', $option_list,
];
push @$cmd, $vgname if $vgname;
my $lvs = {};
run_command($cmd, outfunc => sub {
my $line = shift;
$line = trim($line);
my ($vg_name, $lv_name, $lv_size, $lv_attr, $pool_lv, $data_percent, $meta_percent, $snap_percent, $uuid, $tags, $meta_size, $ctime) = split(':', $line);
return if !$vg_name;
return if !$lv_name;
my $lv_type = substr($lv_attr, 0, 1);
my $d = {
lv_size => int($lv_size),
lv_state => substr($lv_attr, 4, 1),
lv_type => $lv_type,
};
$d->{pool_lv} = $pool_lv if $pool_lv;
$d->{tags} = $tags if $tags;
$d->{ctime} = $ctime;
if ($lv_type eq 't') {
$data_percent ||= 0;
$meta_percent ||= 0;
$snap_percent ||= 0;
$d->{metadata_size} = int($meta_size);
$d->{metadata_used} = int(($meta_percent * $meta_size)/100);
$d->{used} = int(($data_percent * $lv_size)/100);
}
$lvs->{$vg_name}->{$lv_name} = $d;
},
errfunc => $ignore_no_medium_warnings,
);
return $lvs;
}
# Configuration
sub type {
return 'lvm';
}
sub plugindata {
return {
content => [ {images => 1, rootdir => 1}, { images => 1 }],
};
}
sub properties {
return {
vgname => {
description => "Volume group name.",
type => 'string', format => 'pve-storage-vgname',
},
base => {
description => "Base volume. This volume is automatically activated.",
type => 'string', format => 'pve-volume-id',
},
saferemove => {
description => "Zero-out data when removing LVs.",
type => 'boolean',
},
saferemove_throughput => {
description => "Wipe throughput (cstream -t parameter value).",
type => 'string',
},
tagged_only => {
description => "Only use logical volumes tagged with 'pve-vm-ID'.",
type => 'boolean',
}
};
}
sub options {
return {
vgname => { fixed => 1 },
nodes => { optional => 1 },
shared => { optional => 1 },
disable => { optional => 1 },
saferemove => { optional => 1 },
saferemove_throughput => { optional => 1 },
content => { optional => 1 },
base => { fixed => 1, optional => 1 },
tagged_only => { optional => 1 },
bwlimit => { optional => 1 },
};
}
# Storage implementation
sub on_add_hook {
my ($class, $storeid, $scfg, %param) = @_;
if (my $base = $scfg->{base}) {
my ($baseid, $volname) = PVE::Storage::parse_volume_id($base);
my $cfg = PVE::Storage::config();
my $basecfg = PVE::Storage::storage_config ($cfg, $baseid, 1);
die "base storage ID '$baseid' does not exist\n" if !$basecfg;
# we only support iscsi for now
die "unsupported base type '$basecfg->{type}'"
if $basecfg->{type} ne 'iscsi';
my $path = PVE::Storage::path($cfg, $base);
PVE::Storage::activate_storage($cfg, $baseid);
lvm_create_volume_group($path, $scfg->{vgname}, $scfg->{shared});
}
return;
}
sub parse_volname {
my ($class, $volname) = @_;
PVE::Storage::Plugin::parse_lvm_name($volname);
if ($volname =~ m/^(vm-(\d+)-\S+)$/) {
return ('images', $1, $2, undef, undef, undef, 'raw');
}
die "unable to parse lvm volume name '$volname'\n";
}
sub filesystem_path {
my ($class, $scfg, $volname, $snapname) = @_;
die "lvm snapshot is not implemented"if defined($snapname);
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
my $vg = $scfg->{vgname};
my $path = "/dev/$vg/$name";
return wantarray ? ($path, $vmid, $vtype) : $path;
}
sub create_base {
my ($class, $storeid, $scfg, $volname) = @_;
die "can't create base images in lvm storage\n";
}
sub clone_image {
my ($class, $scfg, $storeid, $volname, $vmid, $snap) = @_;
die "can't clone images in lvm storage\n";
}
sub find_free_diskname {
my ($class, $storeid, $scfg, $vmid, $fmt, $add_fmt_suffix) = @_;
my $vg = $scfg->{vgname};
my $lvs = lvm_list_volumes($vg);
my $disk_list = [ keys %{$lvs->{$vg}} ];
return PVE::Storage::Plugin::get_next_vm_diskname($disk_list, $storeid, $vmid, undef, $scfg);
}
sub lvcreate {
my ($vg, $name, $size, $tags) = @_;
if ($size =~ m/\d$/) { # no unit is given
$size .= "k"; # default to kilobytes
}
my $cmd = ['/sbin/lvcreate', '-aly', '-Wy', '--yes', '--size', $size, '--name', $name];
for my $tag (@$tags) {
push @$cmd, '--addtag', $tag;
}
push @$cmd, $vg;
run_command($cmd, errmsg => "lvcreate '$vg/$name' error");
}
sub lvrename {
my ($vg, $oldname, $newname) = @_;
run_command(
['/sbin/lvrename', $vg, $oldname, $newname],
errmsg => "lvrename '${vg}/${oldname}' to '${newname}' error",
);
}
sub alloc_image {
my ($class, $storeid, $scfg, $vmid, $fmt, $name, $size) = @_;
die "unsupported format '$fmt'" if $fmt ne 'raw';
die "illegal name '$name' - should be 'vm-$vmid-*'\n"
if $name && $name !~ m/^vm-$vmid-/;
my $vgs = lvm_vgs();
my $vg = $scfg->{vgname};
die "no such volume group '$vg'\n" if !defined ($vgs->{$vg});
my $free = int($vgs->{$vg}->{free});
die "not enough free space ($free < $size)\n" if $free < $size;
$name = $class->find_free_diskname($storeid, $scfg, $vmid)
if !$name;
lvcreate($vg, $name, $size, ["pve-vm-$vmid"]);
return $name;
}
sub free_image {
my ($class, $storeid, $scfg, $volname, $isBase) = @_;
my $vg = $scfg->{vgname};
# we need to zero out LVM data for security reasons
# and to allow thin provisioning
my $zero_out_worker = sub {
print "zero-out data on image $volname (/dev/$vg/del-$volname)\n";
# wipe throughput up to 10MB/s by default; may be overwritten with saferemove_throughput
my $throughput = '-10485760';
if ($scfg->{saferemove_throughput}) {
$throughput = $scfg->{saferemove_throughput};
}
my $cmd = [
'/usr/bin/cstream',
'-i', '/dev/zero',
'-o', "/dev/$vg/del-$volname",
'-T', '10',
'-v', '1',
'-b', '1048576',
'-t', "$throughput"
];
eval { run_command($cmd, errmsg => "zero out finished (note: 'No space left on device' is ok here)"); };
warn $@ if $@;
$class->cluster_lock_storage($storeid, $scfg->{shared}, undef, sub {
my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$volname"];
run_command($cmd, errmsg => "lvremove '$vg/del-$volname' error");
});
print "successfully removed volume $volname ($vg/del-$volname)\n";
};
my $cmd = ['/sbin/lvchange', '-aly', "$vg/$volname"];
run_command($cmd, errmsg => "can't activate LV '$vg/$volname' to zero-out its data");
$cmd = ['/sbin/lvchange', '--refresh', "$vg/$volname"];
run_command($cmd, errmsg => "can't refresh LV '$vg/$volname' to zero-out its data");
if ($scfg->{saferemove}) {
# avoid long running task, so we only rename here
$cmd = ['/sbin/lvrename', $vg, $volname, "del-$volname"];
run_command($cmd, errmsg => "lvrename '$vg/$volname' error");
return $zero_out_worker;
} else {
my $tmpvg = $scfg->{vgname};
$cmd = ['/sbin/lvremove', '-f', "$tmpvg/$volname"];
run_command($cmd, errmsg => "lvremove '$tmpvg/$volname' error");
}
return undef;
}
my $check_tags = sub {
my ($tags) = @_;
return defined($tags) && $tags =~ /(^|,)pve-vm-\d+(,|$)/;
};
sub list_images {
my ($class, $storeid, $scfg, $vmid, $vollist, $cache) = @_;
my $vgname = $scfg->{vgname};
$cache->{lvs} = lvm_list_volumes() if !$cache->{lvs};
my $res = [];
if (my $dat = $cache->{lvs}->{$vgname}) {
foreach my $volname (keys %$dat) {
next if $volname !~ m/^vm-(\d+)-/;
my $owner = $1;
my $info = $dat->{$volname};
next if $scfg->{tagged_only} && !&$check_tags($info->{tags});
# Allow mirrored and RAID LVs
next if $info->{lv_type} !~ m/^[-mMrR]$/;
my $volid = "$storeid:$volname";
if ($vollist) {
my $found = grep { $_ eq $volid } @$vollist;
next if !$found;
} else {
next if defined($vmid) && ($owner ne $vmid);
}
push @$res, {
volid => $volid, format => 'raw', size => $info->{lv_size}, vmid => $owner,
ctime => $info->{ctime},
};
}
}
return $res;
}
sub status {
my ($class, $storeid, $scfg, $cache) = @_;
$cache->{vgs} = lvm_vgs() if !$cache->{vgs};
my $vgname = $scfg->{vgname};
if (my $info = $cache->{vgs}->{$vgname}) {
return ($info->{size}, $info->{free}, $info->{size} - $info->{free}, 1);
}
return undef;
}
sub activate_storage {
my ($class, $storeid, $scfg, $cache) = @_;
$cache->{vgs} = lvm_vgs() if !$cache->{vgs};
# In LVM2, vgscans take place automatically;
# this is just to be sure
if ($cache->{vgs} && !$cache->{vgscaned} &&
!$cache->{vgs}->{$scfg->{vgname}}) {
$cache->{vgscaned} = 1;
my $cmd = ['/sbin/vgscan', '--ignorelockingfailure', '--mknodes'];
eval { run_command($cmd, outfunc => sub {}); };
warn $@ if $@;
}
# we do not acticate any volumes here ('vgchange -aly')
# instead, volumes are activate individually later
}
sub deactivate_storage {
my ($class, $storeid, $scfg, $cache) = @_;
my $cmd = ['/sbin/vgchange', '-aln', $scfg->{vgname}];
run_command($cmd, errmsg => "can't deactivate VG '$scfg->{vgname}'");
}
sub activate_volume {
my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
#fix me lvmchange is not provided on
my $path = $class->path($scfg, $volname, $snapname);
my $lvm_activate_mode = 'ey';
my $cmd = ['/sbin/lvchange', "-a$lvm_activate_mode", $path];
run_command($cmd, errmsg => "can't activate LV '$path'");
$cmd = ['/sbin/lvchange', '--refresh', $path];
run_command($cmd, errmsg => "can't refresh LV '$path' for activation");
}
sub deactivate_volume {
my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
my $path = $class->path($scfg, $volname, $snapname);
return if ! -b $path;
my $cmd = ['/sbin/lvchange', '-aln', $path];
run_command($cmd, errmsg => "can't deactivate LV '$path'");
}
sub volume_resize {
my ($class, $scfg, $storeid, $volname, $size, $running) = @_;
$size = ($size/1024/1024) . "M";
my $path = $class->path($scfg, $volname);
my $cmd = ['/sbin/lvextend', '-L', $size, $path];
$class->cluster_lock_storage($storeid, $scfg->{shared}, undef, sub {
run_command($cmd, errmsg => "error resizing volume '$path'");
});
return 1;
}
sub volume_size_info {
my ($class, $scfg, $storeid, $volname, $timeout) = @_;
my $path = $class->filesystem_path($scfg, $volname);
my $cmd = ['/sbin/lvs', '--separator', ':', '--noheadings', '--units', 'b',
'--unbuffered', '--nosuffix', '--options', 'lv_size', $path];
my $size;
run_command($cmd, timeout => $timeout, errmsg => "can't get size of '$path'",
outfunc => sub {
$size = int(shift);
});
return wantarray ? ($size, 'raw', 0, undef) : $size;
}
sub volume_snapshot {
my ($class, $scfg, $storeid, $volname, $snap) = @_;
die "lvm snapshot is not implemented";
}
sub volume_snapshot_rollback {
my ($class, $scfg, $storeid, $volname, $snap) = @_;
die "lvm snapshot rollback is not implemented";
}
sub volume_snapshot_delete {
my ($class, $scfg, $storeid, $volname, $snap) = @_;
die "lvm snapshot delete is not implemented";
}
sub volume_has_feature {
my ($class, $scfg, $feature, $storeid, $volname, $snapname, $running) = @_;
my $features = {
copy => { base => 1, current => 1},
rename => {current => 1},
};
my ($vtype, $name, $vmid, $basename, $basevmid, $isBase) =
$class->parse_volname($volname);
my $key = undef;
if($snapname){
$key = 'snap';
}else{
$key = $isBase ? 'base' : 'current';
}
return 1 if $features->{$feature}->{$key};
return undef;
}
sub volume_export_formats {
my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
return () if defined($snapshot); # lvm-thin only
return volume_import_formats($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots);
}
sub volume_export {
my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots) = @_;
die "volume export format $format not available for $class\n"
if $format ne 'raw+size';
die "cannot export volumes together with their snapshots in $class\n"
if $with_snapshots;
die "cannot export a snapshot in $class\n" if defined($snapshot);
die "cannot export an incremental stream in $class\n" if defined($base_snapshot);
my $file = $class->path($scfg, $volname, $storeid);
my $size;
# should be faster than querying LVM, also checks for the device file's availability
run_command(['/sbin/blockdev', '--getsize64', $file], outfunc => sub {
my ($line) = @_;
die "unexpected output from /sbin/blockdev: $line\n" if $line !~ /^(\d+)$/;
$size = int($1);
});
PVE::Storage::Plugin::write_common_header($fh, $size);
run_command(['dd', "if=$file", "bs=64k"], output => '>&'.fileno($fh));
}
sub volume_import_formats {
my ($class, $scfg, $storeid, $volname, $snapshot, $base_snapshot, $with_snapshots) = @_;
return () if $with_snapshots; # not supported
return () if defined($base_snapshot); # not supported
return ('raw+size');
}
sub volume_import {
my ($class, $scfg, $storeid, $fh, $volname, $format, $snapshot, $base_snapshot, $with_snapshots, $allow_rename) = @_;
die "volume import format $format not available for $class\n"
if $format ne 'raw+size';
die "cannot import volumes together with their snapshots in $class\n"
if $with_snapshots;
die "cannot import an incremental stream in $class\n" if defined($base_snapshot);
my ($vtype, $name, $vmid, $basename, $basevmid, $isBase, $file_format) =
$class->parse_volname($volname);
die "cannot import format $format into a file of format $file_format\n"
if $file_format ne 'raw';
my $vg = $scfg->{vgname};
my $lvs = lvm_list_volumes($vg);
if ($lvs->{$vg}->{$volname}) {
die "volume $vg/$volname already exists\n" if !$allow_rename;
warn "volume $vg/$volname already exists - importing with a different name\n";
$name = undef;
}
my ($size) = PVE::Storage::Plugin::read_common_header($fh);
$size = int($size/1024);
eval {
my $allocname = $class->alloc_image($storeid, $scfg, $vmid, 'raw', $name, $size);
my $oldname = $volname;
$volname = $allocname;
if (defined($name) && $allocname ne $oldname) {
die "internal error: unexpected allocated name: '$allocname' != '$oldname'\n";
}
my $file = $class->path($scfg, $volname, $storeid)
or die "internal error: failed to get path to newly allocated volume $volname\n";
$class->volume_import_write($fh, $file);
};
if (my $err = $@) {
my $cleanup_worker = eval { $class->free_image($storeid, $scfg, $volname, 0) };
warn $@ if $@;
if ($cleanup_worker) {
my $rpcenv = PVE::RPCEnvironment::get();
my $authuser = $rpcenv->get_user();
$rpcenv->fork_worker('imgdel', undef, $authuser, $cleanup_worker);
}
die $err;
}
return "$storeid:$volname";
}
sub volume_import_write {
my ($class, $input_fh, $output_file) = @_;
run_command(['dd', "of=$output_file", 'bs=64k'],
input => '<&'.fileno($input_fh));
}
sub rename_volume {
my ($class, $scfg, $storeid, $source_volname, $target_vmid, $target_volname) = @_;
my (
undef,
$source_image,
$source_vmid,
$base_name,
$base_vmid,
undef,
$format
) = $class->parse_volname($source_volname);
$target_volname = $class->find_free_diskname($storeid, $scfg, $target_vmid, $format)
if !$target_volname;
my $vg = $scfg->{vgname};
my $lvs = lvm_list_volumes($vg);
die "target volume '${target_volname}' already exists\n"
if ($lvs->{$vg}->{$target_volname});
lvrename($vg, $source_volname, $target_volname);
return "${storeid}:${target_volname}";
}
1;