如何将Linux内核缓冲区映射到用户空间?
假设缓冲区是使用基于页面的方案分配的。实现mmap的一种方法是使用remap_pfn_range,但LDD3说这对传统内存不起作用。看来我们可以通过使用SetPageReserved标记保留的页面来解决这个问题,以便它在内存中被锁定。但是,并非所有的内核内存已经不可交换,即已经保留了吗?为什么需要明确设置保留位?如何将Linux内核缓冲区映射到用户空间?
这是否与从HIGH_MEM分配的页面有关?
在mmap方法中,从内核映射一组页面的最简单方法是使用错误处理程序来映射页面。基本上你最终的东西,如:
static int my_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &my_vm_ops;
return 0;
}
static const struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = nonseekable_open,
.mmap = my_mmap,
.llseek = no_llseek,
};
(其中其他文件操作无论您需要的模块)。同样在my_mmap
中,您需要进行任何范围检查等来验证mmap参数。
然后vm_ops
样子:
static int my_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
vmf->page = my_page_at_index(vmf->pgoff);
get_page(vmf->page);
return 0;
}
static const struct vm_operations_struct my_vm_ops = {
.fault = my_fault
}
,你只需要弄清楚一个给定VMA/VMF传递给你的错功能映射的页面到用户空间。这取决于你的模块是如何工作的。举例来说,如果你没有
my_buf = vmalloc_user(MY_BUF_SIZE);
,那么你使用的页面会像
vmalloc_to_page(my_buf + (vmf->pgoff << PAGE_SHIFT));
但是你可以很容易地创建一个数组,并为每个条目,使用kmalloc的,不管分配的页面。
[只注意到my_fault
是功能稍微有趣的名字]
谢谢。这非常有帮助。不过,我们是否需要在故障处理程序中调用vm_insert_page?另外,谁将撤销get_page以允许页面稍后被释放?我想,一旦用户空间做了munmap,我们就可以从vma_close中得到一些代码,我们可以在这个代码中为所有发生故障的页面put_page。这是正确的方法吗? – ravi
不,如果您设置了vmf->页面,则不需要执行vm_insert_page。如果你在映射非页面支持的设备内存方面做了很多漂亮的工作,那么你可能需要vm_insert_pfn(),但实际上你可能不需要担心这一点。 put_page()在映射被拆除时由核心虚拟机代码处理。真的,对于将内核内存映射到用户空间的简单驱动程序,我向你展示了几乎所有你需要的东西。 – Roland
你好。如果不可能vmalloc() - 吃my_buf缓冲区,那么my_fault()方法的主体是什么? (因为太大)。按需分配一页一页的分配。 – user1284631
虽然网页是通过内核驱动程序保留的,它是指通过用户空间进行访问。因此,PTE(页表条目)不知道pfn是否属于用户空间或内核空间(即使它们是通过内核驱动程序分配的)。
这就是为什么他们被标记为SetPageReserved
。
最小运行的例子,用户态测试
#include <asm/uaccess.h> /* copy_from_user */
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* min */
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
static const char *filename = "lkmc_mmap";
enum { BUFFER_SIZE = 4 };
struct mmap_info {
char *data;
};
/* After unmap. */
static void vm_close(struct vm_area_struct *vma)
{
pr_info("vm_close\n");
}
/* First page access. */
static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page;
struct mmap_info *info;
pr_info("vm_fault\n");
info = (struct mmap_info *)vma->vm_private_data;
if (info->data) {
page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
}
return 0;
}
/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */
static void vm_open(struct vm_area_struct *vma)
{
pr_info("vm_open\n");
}
static struct vm_operations_struct vm_ops =
{
.close = vm_close,
.fault = vm_fault,
.open = vm_open,
};
static int mmap(struct file *filp, struct vm_area_struct *vma)
{
pr_info("mmap\n");
vma->vm_ops = &vm_ops;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_private_data = filp->private_data;
vm_open(vma);
return 0;
}
static int open(struct inode *inode, struct file *filp)
{
struct mmap_info *info;
pr_info("open\n");
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info));
info->data = (char *)get_zeroed_page(GFP_KERNEL);
memcpy(info->data, "asdf", BUFFER_SIZE);
filp->private_data = info;
return 0;
}
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
int ret;
pr_info("read\n");
info = filp->private_data;
ret = min(len, (size_t)BUFFER_SIZE);
if (copy_to_user(buf, info->data, ret)) {
ret = -EFAULT;
}
return ret;
}
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
pr_info("write\n");
info = filp->private_data;
if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) {
return -EFAULT;
} else {
return len;
}
}
static int release(struct inode *inode, struct file *filp)
{
struct mmap_info *info;
pr_info("release\n");
info = filp->private_data;
free_page((unsigned long)info->data);
kfree(info);
filp->private_data = NULL;
return 0;
}
static const struct file_operations fops = {
.mmap = mmap,
.open = open,
.release = release,
.read = read,
.write = write,
};
static int myinit(void)
{
proc_create(filename, 0, NULL, &fops);
return 0;
}
static void myexit(void)
{
remove_proc_entry(filename, NULL);
}
module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");
#define _XOPEN_SOURCE 700
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h> /* uintmax_t */
#include <string.h>
#include <sys/mman.h>
#include <unistd.h> /* sysconf */
#include "common.h" /* virt_to_phys_user */
enum { BUFFER_SIZE = 4 };
int main(int argc, char **argv)
{
int fd;
long page_size;
char *address1, *address2;
char buf[BUFFER_SIZE];
uintptr_t paddr;
if (argc < 2) {
printf("Usage: %s <mmap_file>\n", argv[0]);
return EXIT_FAILURE;
}
page_size = sysconf(_SC_PAGE_SIZE);
printf("open pathname = %s\n", argv[1]);
fd = open(argv[1], O_RDWR | O_SYNC);
if (fd < 0) {
perror("open");
assert(0);
}
printf("fd = %d\n", fd);
/* mmap twice for double fun. */
puts("mmap 1");
address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (address1 == MAP_FAILED) {
perror("mmap");
assert(0);
}
puts("mmap 2");
address2 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (address2 == MAP_FAILED) {
perror("mmap");
return EXIT_FAILURE;
}
assert(address1 != address2);
/* Read and modify memory. */
puts("access 1");
assert(!strcmp(address1, "asdf"));
/* vm_fault */
puts("access 2");
assert(!strcmp(address2, "asdf"));
/* vm_fault */
strcpy(address1, "qwer");
/* Also modified. So both virtual addresses point to the same physical address. */
assert(!strcmp(address2, "qwer"));
/* Check that the physical addresses are the same.
* They are, but TODO why virt_to_phys on kernel gives a different value? */
assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address1));
printf("paddr1 = 0x%jx\n", (uintmax_t)paddr);
assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address2));
printf("paddr2 = 0x%jx\n", (uintmax_t)paddr);
/* Check that modifications made from userland are also visible from the kernel. */
read(fd, buf, BUFFER_SIZE);
assert(!memcmp(buf, "qwer", BUFFER_SIZE));
/* Modify the data from the kernel, and check that the change is visible from userland. */
write(fd, "zxcv", 4);
assert(!strcmp(address1, "zxcv"));
assert(!strcmp(address2, "zxcv"));
/* Cleanup. */
puts("munmap 1");
if (munmap(address1, page_size)) {
perror("munmap");
assert(0);
}
puts("munmap 2");
if (munmap(address2, page_size)) {
perror("munmap");
assert(0);
}
puts("close");
close(fd);
return EXIT_SUCCESS;
}
不知道这是否可以帮助,但据我所知,[逆足(HTTP ://lxr.free-electrons.com/source/tools/perf/design.txt)内核中的子系统提供了一组来自内核内存的页面(一个环缓冲区,实际上)可以由用户空间应用程序mmap。它的实现可能会提供一些关于你的问题的提示,可能值得看看它的源代码。 – Eugene