Skip to content

QEMU egl-headless显示器与NVIDIA显卡的适配问题

主要碰到了两个问题:

  1. 虚拟机启动后,在物理机使用nvidia-smi查看,发现QEMU并未调用NVIDIA驱动;
  2. 使用virt-manager启动虚拟机,初始化egl-headless显示器时崩溃。

问题1

目前,QEMU egl-headless显示器使用的是EGL_mesa_platform_gbm(也即 GL_KHR_platform_gbm),而NVIDIA驱动从495.29.05才支持该platform,所以首先 得保证驱动版本符合要求。相关信息参见:

驱动安装完成后,可以先在物理机上使用eglinfo确认一下“GBM platform”下的 驱动名称是否为NVIDIA,如果是的话应该是可以避免该问题的。

$ eglinfo
EGL client extensions string:
    EGL_EXT_platform_base EGL_EXT_device_base EGL_EXT_device_enumeration
    EGL_EXT_device_query EGL_KHR_client_get_all_proc_addresses
    EGL_EXT_client_extensions EGL_KHR_debug EGL_KHR_platform_x11
    EGL_EXT_platform_x11 EGL_EXT_platform_device
    EGL_MESA_platform_surfaceless EGL_EXT_explicit_device
    EGL_KHR_platform_wayland EGL_EXT_platform_wayland
    EGL_KHR_platform_gbm EGL_MESA_platform_gbm EGL_MESA_platform_xcb

GBM platform:
EGL API version: 1.5
EGL vendor string: NVIDIA
EGL version string: 1.5
EGL client APIs: OpenGL_ES OpenGL
EGL extensions string:
    EGL_EXT_buffer_age EGL_EXT_client_sync
...

问题2

解决办法

修改/etc/libvirt/qemu.conf文件,反注释掉cgroup_device_acl配置项,并将 NVIDIA设备全部加进去,然后重启系统。

# This is the basic set of devices allowed / required by
# all virtual machines.
#
# As well as this, any configured block backed disks,
# all sound device, and all PTY devices are allowed.
#
# This will only need setting if newer QEMU suddenly
# wants some device we don't already know about.
#
cgroup_device_acl = [
    "/dev/null", "/dev/full", "/dev/zero",
    "/dev/random", "/dev/urandom",
    "/dev/ptmx", "/dev/kvm",
    "/dev/nvidia0",  "/dev/nvidiactl",  "/dev/nvidia-modeset",  "/dev/nvidia-uvm",  "/dev/nvidia-uvm-tools"
]

原因分析

经过一系列尝试,发现通过终端手动起QEMU是正常的,但通过virsh/virt-manager启动 就会导致崩溃。于是用strace工具对QEMU进行分析,发现进程是被SIGSYS信号终止的:

9205  openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 33
9205  newfstatat(33, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
9205  read(33, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 878
9205  close(33)                         = 0
9205  stat("/dev/nvidiactl", 0x7ffd33310e90) = -1 ENOENT (No such file or directory)
9205  mknodat(AT_FDCWD, "/dev/nvidiactl", S_IFCHR|0666, makedev(0xc3, 0xff)) = -1 EACCES (Permission denied)
9205  stat("/usr/bin/nvidia-modprobe", {st_mode=S_IFREG|S_ISUID|0755, st_size=39232, ...}) = 0
9205  geteuid()                         = 107
9205  clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f52c2b05150) = 56
9205  +++ killed by SIGSYS (core dumped) +++

与正常日志对比:

openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 878
close(4)                                = 0
stat("/dev/nvidiactl", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0xff), ...}) = 0
openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 4
fcntl(4, F_SETFD, FD_CLOEXEC)           = 0
ioctl(4, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd2, 0x48), 0x7ffc67a5c750) = 0
openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 5
read(5, "8000000\n", 99)                = 8
close(5)                                = 0

可见差异主要在/dev/nvidiactl的访问上,问题日志中报告了ENOENT错误,但显然 物理机上是存在这个设备节点的:

# ls -l /dev/nvidia*
crw-rw-rw- 1 root root 195,   0 Jan  4 11:22 /dev/nvidia0
crw-rw-rw- 1 root root 195, 255 Jan  4 11:22 /dev/nvidiactl
crw-rw-rw- 1 root root 195, 254 Jan  4 11:22 /dev/nvidia-modeset

进入QEMU进程的/proc/<pid>/root目录查看,确实没有上述设备,难道libvirt还为 QEMU设置了namespace?

/* libvirt/src/qemu/qemu_namespace.c */

static int
qemuDomainSetupGraphics(virDomainGraphicsDef *gfx,
                        GSList **paths)
{
    const char *rendernode = virDomainGraphicsGetRenderNode(gfx);

    if (!rendernode)
        return 0;

    *paths = g_slist_prepend(*paths, g_strdup(rendernode));
    return 0;
}


static int
qemuDomainSetupAllGraphics(virDomainObj *vm,
                           GSList **paths)
{
    size_t i;

    VIR_DEBUG("Setting up graphics");
    for (i = 0; i < vm->def->ngraphics; i++) {
        if (qemuDomainSetupGraphics(vm->def->graphics[i],
                                    paths) < 0)
            return -1;
    }

    VIR_DEBUG("Setup all graphics");
    return 0;
}


bool
qemuDomainNamespaceEnabled(virDomainObj *vm,
                           qemuDomainNamespace ns)
{
    qemuDomainObjPrivate *priv = vm->privateData;

    return priv->namespaces &&
        virBitmapIsBitSet(priv->namespaces, ns);
}


int
qemuDomainBuildNamespace(virQEMUDriverConfig *cfg,
                         virDomainObj *vm)
{
    g_autoptr(virGSListString) paths = NULL;

    if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) {
        VIR_DEBUG("namespaces disabled for domain %s", vm->def->name);
        return 0;
    }

    if (qemuDomainPopulateDevices(cfg, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllDisks(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllHostdevs(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllMemories(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllChardevs(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllTPMs(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllGraphics(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllInputs(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupAllRNGs(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupLoader(vm, &paths) < 0)
        return -1;

    if (qemuDomainSetupLaunchSecurity(vm, &paths) < 0)
        return -1;

    if (qemuNamespaceMknodPaths(vm, paths, NULL) < 0)
        return -1;

    return 0;
}

可见确实是会设置namepsace。

/* libvirt/src/qemu/qemu_conf.c */

virQEMUDriverConfig *virQEMUDriverConfigNew(bool privileged,
                                              const char *root)
{
    ...

    if (privileged &&
        qemuDomainNamespaceAvailable(QEMU_DOMAIN_NS_MOUNT) &&
        virBitmapSetBit(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) < 0)
        return NULL;

    ...
}

由上可知,当为特权模式且系统支持namespace时,就会设置QEMU_DOMAIN_NS_MOUNT 标志位,从而导致QEMU被隔离,只允许访问DRI renderer node节点。

根据官方文档 的介绍,凡是是以qemu:///system的URI连接QEMU driver,都为特权模式。所以 这个特权模式是不好调整的,那有没有其它的配置接口呢?

当然是有的,那就是上一节提到的/etc/libvirt/qemu.conf,具体配置方法该文件 注释写的比较清楚,不再赘述。


Last update: 2023-01-11