如何将Linux内核缓冲区映射到用户空间？

问题描述：

假设缓冲区是使用基于页面的方案分配的。实现mmap的一种方法是使用remap_pfn_range，但LDD3说这对传统内存不起作用。看来我们可以通过使用SetPageReserved标记保留的页面来解决这个问题，以便它在内存中被锁定。但是，并非所有的内核内存已经不可交换，即已经保留了吗？为什么需要明确设置保留位？如何将Linux内核缓冲区映射到用户空间？

这是否与从HIGH_MEM分配的页面有关？

不知道这是否可以帮助，但据我所知，[逆足（HTTP ：//lxr.free-electrons.com/source/tools/perf/design.txt）内核中的子系统提供了一组来自内核内存的页面（一个环缓冲区，实际上）可以由用户空间应用程序mmap。它的实现可能会提供一些关于你的问题的提示，可能值得看看它的源代码。 – Eugene

答

在mmap方法中，从内核映射一组页面的最简单方法是使用错误处理程序来映射页面。基本上你最终的东西，如：

static int my_mmap(struct file *filp, struct vm_area_struct *vma) 
{ 
    vma->vm_ops = &my_vm_ops; 
    return 0; 
} 

static const struct file_operations my_fops = { 
    .owner = THIS_MODULE, 
    .open = nonseekable_open, 
    .mmap = my_mmap, 
    .llseek = no_llseek, 
};

（其中其他文件操作无论您需要的模块）。同样在my_mmap中，您需要进行任何范围检查等来验证mmap参数。

然后vm_ops样子：

static int my_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 
{ 
    vmf->page = my_page_at_index(vmf->pgoff); 
    get_page(vmf->page); 

    return 0; 
} 

static const struct vm_operations_struct my_vm_ops = { 
    .fault  = my_fault 
}

，你只需要弄清楚一个给定VMA/VMF传递给你的错功能映射的页面到用户空间。这取决于你的模块是如何工作的。举例来说，如果你没有

my_buf = vmalloc_user(MY_BUF_SIZE);

，那么你使用的页面会像

vmalloc_to_page(my_buf + (vmf->pgoff << PAGE_SHIFT));

但是你可以很容易地创建一个数组，并为每个条目，使用kmalloc的，不管分配的页面。

[只注意到my_fault是功能稍微有趣的名字]

谢谢。这非常有帮助。不过，我们是否需要在故障处理程序中调用vm_insert_page？另外，谁将撤销get_page以允许页面稍后被释放？我想，一旦用户空间做了munmap，我们就可以从vma_close中得到一些代码，我们可以在这个代码中为所有发生故障的页面put_page。这是正确的方法吗？ – ravi

不，如果您设置了vmf->页面，则不需要执行vm_insert_page。如果你在映射非页面支持的设备内存方面做了很多漂亮的工作，那么你可能需要vm_insert_pfn（），但实际上你可能不需要担心这一点。 put_page（）在映射被拆除时由核心虚拟机代码处理。真的，对于将内核内存映射到用户空间的简单驱动程序，我向你展示了几乎所有你需要的东西。 – Roland

你好。如果不可能vmalloc（） - 吃my_buf缓冲区，那么my_fault（）方法的主体是什么？（因为太大）。按需分配一页一页的分配。 – user1284631

答

虽然网页是通过内核驱动程序保留的，它是指通过用户空间进行访问。因此，PTE（页表条目）不知道pfn是否属于用户空间或内核空间（即使它们是通过内核驱动程序分配的）。

这就是为什么他们被标记为SetPageReserved。

答

最小运行的例子，用户态测试

Kernel module：

#include <asm/uaccess.h> /* copy_from_user */ 
#include <linux/debugfs.h> 
#include <linux/fs.h> 
#include <linux/init.h> 
#include <linux/kernel.h> /* min */ 
#include <linux/mm.h> 
#include <linux/module.h> 
#include <linux/proc_fs.h> 
#include <linux/slab.h> 

static const char *filename = "lkmc_mmap"; 

enum { BUFFER_SIZE = 4 }; 

struct mmap_info { 
    char *data; 
}; 

/* After unmap. */ 
static void vm_close(struct vm_area_struct *vma) 
{ 
    pr_info("vm_close\n"); 
} 

/* First page access. */ 
static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 
{ 
    struct page *page; 
    struct mmap_info *info; 

    pr_info("vm_fault\n"); 
    info = (struct mmap_info *)vma->vm_private_data; 
    if (info->data) { 
     page = virt_to_page(info->data); 
     get_page(page); 
     vmf->page = page; 
    } 
    return 0; 
} 

/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */ 
static void vm_open(struct vm_area_struct *vma) 
{ 
    pr_info("vm_open\n"); 
} 

static struct vm_operations_struct vm_ops = 
{ 
    .close = vm_close, 
    .fault = vm_fault, 
    .open = vm_open, 
}; 

static int mmap(struct file *filp, struct vm_area_struct *vma) 
{ 
    pr_info("mmap\n"); 
    vma->vm_ops = &vm_ops; 
    vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 
    vma->vm_private_data = filp->private_data; 
    vm_open(vma); 
    return 0; 
} 

static int open(struct inode *inode, struct file *filp) 
{ 
    struct mmap_info *info; 

    pr_info("open\n"); 
    info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL); 
    pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info)); 
    info->data = (char *)get_zeroed_page(GFP_KERNEL); 
    memcpy(info->data, "asdf", BUFFER_SIZE); 
    filp->private_data = info; 
    return 0; 
} 

static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off) 
{ 
    struct mmap_info *info; 
    int ret; 

    pr_info("read\n"); 
    info = filp->private_data; 
    ret = min(len, (size_t)BUFFER_SIZE); 
    if (copy_to_user(buf, info->data, ret)) { 
     ret = -EFAULT; 
    } 
    return ret; 
} 

static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off) 
{ 
    struct mmap_info *info; 

    pr_info("write\n"); 
    info = filp->private_data; 
    if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) { 
     return -EFAULT; 
    } else { 
     return len; 
    } 
} 

static int release(struct inode *inode, struct file *filp) 
{ 
    struct mmap_info *info; 

    pr_info("release\n"); 
    info = filp->private_data; 
    free_page((unsigned long)info->data); 
    kfree(info); 
    filp->private_data = NULL; 
    return 0; 
} 

static const struct file_operations fops = { 
    .mmap = mmap, 
    .open = open, 
    .release = release, 
    .read = read, 
    .write = write, 
}; 

static int myinit(void) 
{ 
    proc_create(filename, 0, NULL, &fops); 
    return 0; 
} 

static void myexit(void) 
{ 
    remove_proc_entry(filename, NULL); 
} 

module_init(myinit) 
module_exit(myexit) 
MODULE_LICENSE("GPL");

Userland test：

#define _XOPEN_SOURCE 700 
#include <assert.h> 
#include <fcntl.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <stdint.h> /* uintmax_t */ 
#include <string.h> 
#include <sys/mman.h> 
#include <unistd.h> /* sysconf */ 

#include "common.h" /* virt_to_phys_user */ 

enum { BUFFER_SIZE = 4 }; 

int main(int argc, char **argv) 
{ 
    int fd; 
    long page_size; 
    char *address1, *address2; 
    char buf[BUFFER_SIZE]; 
    uintptr_t paddr; 

    if (argc < 2) { 
     printf("Usage: %s <mmap_file>\n", argv[0]); 
     return EXIT_FAILURE; 
    } 
    page_size = sysconf(_SC_PAGE_SIZE); 
    printf("open pathname = %s\n", argv[1]); 
    fd = open(argv[1], O_RDWR | O_SYNC); 
    if (fd < 0) { 
     perror("open"); 
     assert(0); 
    } 
    printf("fd = %d\n", fd); 

    /* mmap twice for double fun. */ 
    puts("mmap 1"); 
    address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 
    if (address1 == MAP_FAILED) { 
     perror("mmap"); 
     assert(0); 
    } 
    puts("mmap 2"); 
    address2 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 
    if (address2 == MAP_FAILED) { 
     perror("mmap"); 
     return EXIT_FAILURE; 
    } 
    assert(address1 != address2); 

    /* Read and modify memory. */ 
    puts("access 1"); 
    assert(!strcmp(address1, "asdf")); 
    /* vm_fault */ 
    puts("access 2"); 
    assert(!strcmp(address2, "asdf")); 
    /* vm_fault */ 
    strcpy(address1, "qwer"); 
    /* Also modified. So both virtual addresses point to the same physical address. */ 
    assert(!strcmp(address2, "qwer")); 

    /* Check that the physical addresses are the same. 
    * They are, but TODO why virt_to_phys on kernel gives a different value? */ 
    assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address1)); 
    printf("paddr1 = 0x%jx\n", (uintmax_t)paddr); 
    assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address2)); 
    printf("paddr2 = 0x%jx\n", (uintmax_t)paddr); 

    /* Check that modifications made from userland are also visible from the kernel. */ 
    read(fd, buf, BUFFER_SIZE); 
    assert(!memcmp(buf, "qwer", BUFFER_SIZE)); 

    /* Modify the data from the kernel, and check that the change is visible from userland. */ 
    write(fd, "zxcv", 4); 
    assert(!strcmp(address1, "zxcv")); 
    assert(!strcmp(address2, "zxcv")); 

    /* Cleanup. */ 
    puts("munmap 1"); 
    if (munmap(address1, page_size)) { 
     perror("munmap"); 
     assert(0); 
    } 
    puts("munmap 2"); 
    if (munmap(address2, page_size)) { 
     perror("munmap"); 
     assert(0); 
    } 
    puts("close"); 
    close(fd); 
    return EXIT_SUCCESS; 
}

如何将Linux内核缓冲区映射到用户空间？

相关推荐