QEMU egl-headless显示器与NVIDIA显卡的适配问题
主要碰到了两个问题:
- 虚拟机启动后,在物理机使用
nvidia-smi
查看,发现QEMU并未调用NVIDIA驱动; - 使用virt-manager启动虚拟机,初始化egl-headless显示器时崩溃。
问题1
目前,QEMU egl-headless显示器使用的是EGL_mesa_platform_gbm
(也即
GL_KHR_platform_gbm
),而NVIDIA驱动从495.29.05才支持该platform,所以首先
得保证驱动版本符合要求。相关信息参见:
- NVIDIA Beta 495.29.05 rolls out with GBM for expanded Wayland support
- NVIDIA Linux x64 (AMD64/EM64T) Display Driver
驱动安装完成后,可以先在物理机上使用eglinfo
确认一下“GBM platform”下的
驱动名称是否为NVIDIA,如果是的话应该是可以避免该问题的。
$ eglinfo
EGL client extensions string:
EGL_EXT_platform_base EGL_EXT_device_base EGL_EXT_device_enumeration
EGL_EXT_device_query EGL_KHR_client_get_all_proc_addresses
EGL_EXT_client_extensions EGL_KHR_debug EGL_KHR_platform_x11
EGL_EXT_platform_x11 EGL_EXT_platform_device
EGL_MESA_platform_surfaceless EGL_EXT_explicit_device
EGL_KHR_platform_wayland EGL_EXT_platform_wayland
EGL_KHR_platform_gbm EGL_MESA_platform_gbm EGL_MESA_platform_xcb
GBM platform:
EGL API version: 1.5
EGL vendor string: NVIDIA
EGL version string: 1.5
EGL client APIs: OpenGL_ES OpenGL
EGL extensions string:
EGL_EXT_buffer_age EGL_EXT_client_sync
...
问题2
解决办法
修改/etc/libvirt/qemu.conf
文件,反注释掉cgroup_device_acl
配置项,并将
NVIDIA设备全部加进去,然后重启系统。
# This is the basic set of devices allowed / required by
# all virtual machines.
#
# As well as this, any configured block backed disks,
# all sound device, and all PTY devices are allowed.
#
# This will only need setting if newer QEMU suddenly
# wants some device we don't already know about.
#
cgroup_device_acl = [
"/dev/null", "/dev/full", "/dev/zero",
"/dev/random", "/dev/urandom",
"/dev/ptmx", "/dev/kvm",
"/dev/nvidia0", "/dev/nvidiactl", "/dev/nvidia-modeset", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"
]
原因分析
经过一系列尝试,发现通过终端手动起QEMU是正常的,但通过virsh/virt-manager启动 就会导致崩溃。于是用strace工具对QEMU进行分析,发现进程是被SIGSYS信号终止的:
9205 openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 33
9205 newfstatat(33, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
9205 read(33, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 878
9205 close(33) = 0
9205 stat("/dev/nvidiactl", 0x7ffd33310e90) = -1 ENOENT (No such file or directory)
9205 mknodat(AT_FDCWD, "/dev/nvidiactl", S_IFCHR|0666, makedev(0xc3, 0xff)) = -1 EACCES (Permission denied)
9205 stat("/usr/bin/nvidia-modprobe", {st_mode=S_IFREG|S_ISUID|0755, st_size=39232, ...}) = 0
9205 geteuid() = 107
9205 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f52c2b05150) = 56
9205 +++ killed by SIGSYS (core dumped) +++
与正常日志对比:
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 878
close(4) = 0
stat("/dev/nvidiactl", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0xff), ...}) = 0
openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 4
fcntl(4, F_SETFD, FD_CLOEXEC) = 0
ioctl(4, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd2, 0x48), 0x7ffc67a5c750) = 0
openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 5
read(5, "8000000\n", 99) = 8
close(5) = 0
可见差异主要在/dev/nvidiactl
的访问上,问题日志中报告了ENOENT
错误,但显然
物理机上是存在这个设备节点的:
# ls -l /dev/nvidia*
crw-rw-rw- 1 root root 195, 0 Jan 4 11:22 /dev/nvidia0
crw-rw-rw- 1 root root 195, 255 Jan 4 11:22 /dev/nvidiactl
crw-rw-rw- 1 root root 195, 254 Jan 4 11:22 /dev/nvidia-modeset
进入QEMU进程的/proc/<pid>/root
目录查看,确实没有上述设备,难道libvirt还为
QEMU设置了namespace?
/* libvirt/src/qemu/qemu_namespace.c */
static int
qemuDomainSetupGraphics(virDomainGraphicsDef *gfx,
GSList **paths)
{
const char *rendernode = virDomainGraphicsGetRenderNode(gfx);
if (!rendernode)
return 0;
*paths = g_slist_prepend(*paths, g_strdup(rendernode));
return 0;
}
static int
qemuDomainSetupAllGraphics(virDomainObj *vm,
GSList **paths)
{
size_t i;
VIR_DEBUG("Setting up graphics");
for (i = 0; i < vm->def->ngraphics; i++) {
if (qemuDomainSetupGraphics(vm->def->graphics[i],
paths) < 0)
return -1;
}
VIR_DEBUG("Setup all graphics");
return 0;
}
bool
qemuDomainNamespaceEnabled(virDomainObj *vm,
qemuDomainNamespace ns)
{
qemuDomainObjPrivate *priv = vm->privateData;
return priv->namespaces &&
virBitmapIsBitSet(priv->namespaces, ns);
}
int
qemuDomainBuildNamespace(virQEMUDriverConfig *cfg,
virDomainObj *vm)
{
g_autoptr(virGSListString) paths = NULL;
if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
VIR_DEBUG("namespaces disabled for domain %s", vm->def->name);
return 0;
}
if (qemuDomainPopulateDevices(cfg, &paths) < 0)
return -1;
if (qemuDomainSetupAllDisks(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllHostdevs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllMemories(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllChardevs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllTPMs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllGraphics(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllInputs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupAllRNGs(vm, &paths) < 0)
return -1;
if (qemuDomainSetupLoader(vm, &paths) < 0)
return -1;
if (qemuDomainSetupLaunchSecurity(vm, &paths) < 0)
return -1;
if (qemuNamespaceMknodPaths(vm, paths, NULL) < 0)
return -1;
return 0;
}
可见确实是会设置namepsace。
/* libvirt/src/qemu/qemu_conf.c */
virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
const char *root)
{
...
if (privileged &&
qemuDomainNamespaceAvailable(QEMU_DOMAIN_NS_MOUNT) &&
virBitmapSetBit(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) < 0)
return NULL;
...
}
由上可知,当为特权模式且系统支持namespace时,就会设置QEMU_DOMAIN_NS_MOUNT
标志位,从而导致QEMU被隔离,只允许访问DRI renderer node节点。
根据官方文档
的介绍,凡是是以qemu:///system
的URI连接QEMU driver,都为特权模式。所以
这个特权模式是不好调整的,那有没有其它的配置接口呢?
当然是有的,那就是上一节提到的/etc/libvirt/qemu.conf
,具体配置方法该文件
注释写的比较清楚,不再赘述。