Write Protect(bit 16 of CR0) — When set, inhibits supervisor-level procedures from writing into read- only pages; when clear, allows supervisor-level procedures to write into read-only pages(regardless of the U/S bit setting; see Section 4.1.3 and Section 4.6).
access_remote_vm->__access_remote_vm 3645/* 3646 * Access another process' address space as given in mm. If non-NULL, use the 3647 * given task for page fault accounting. * @tsk:从上一级函数可以看到这里实际上传递的NULL * @mm:为file->private_data * @addr:将要写入的虚拟地址 * @buf:mem_rw函数当中刚刚使用__get_free_pages获取到的新物理映射到的新内核虚拟地址 * @len:即将写入的长度 * @write:同上为1 3648 */ 3649staticint __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 3650unsignedlong addr, void *buf, int len, int write) 3651 { /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). * 这里的注释为结构体(strcut vm_area_struct) */ 3652structvm_area_struct *vma; 3653void *old_buf = buf; 3654 /* lock for reading */ 3655 down_read(&mm->mmap_sem); 3656/* ignore errors, just check how much was successfully transferred */ 3657while (len) { 3658int bytes, ret, offset; 3659void *maddr; 3660structpage *page =NULL; 3661/* 通过页表来获取addr对应的物理页面,也就是page结构体,这里同样返回了映射到的内核虚拟地址 */ 3662 ret = get_user_pages(tsk, mm, addr, 1, 3663 write, 1, &page, &vma); 3664if (ret <= 0) { 3665#ifndef CONFIG_HAVE_IOREMAP_PROT 3666break; 3667#else 3668/* 3669 * Check if this is a VM_IO | VM_PFNMAP VMA, which 3670 * we can access using slightly different code. 3671 */ 3672 vma = find_vma(mm, addr); 3673if (!vma || vma->vm_start > addr) 3674break; 3675if (vma->vm_ops && vma->vm_ops->access) 3676 ret = vma->vm_ops->access(vma, addr, buf, 3677 len, write); 3678if (ret <= 0) 3679break; 3680 bytes = ret; 3681#endif 3682 } else { 3683 bytes = len; 3684 offset = addr & (PAGE_SIZE-1); 3685if (bytes > PAGE_SIZE-offset) 3686 bytes = PAGE_SIZE-offset; 3687 3688 maddr = kmap(page); ... 3705 3706return buf - old_buf; 3707 }
359staticintcheck_vma_flags(struct vm_area_struct *vma, unsignedlong gup_flags) 360 { 361vm_flags_t vm_flags = vma->vm_flags; 362 363if (vm_flags & (VM_IO | VM_PFNMAP)) 364return -EFAULT; 365 366if (gup_flags & FOLL_WRITE) { 367if (!(vm_flags & VM_WRITE)) { 368if (!(gup_flags & FOLL_FORCE)) 369return -EFAULT; 370/* 371 * We used to let the write,force case do COW in a 372 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could 373 * set a breakpoint in a read-only mapping of an 374 * executable, without corrupting the file (yet only 375 * when that file had been opened for writing!). 376 * Anon pages in shared mappings are surprising: now 377 * just reject it. 378 */ 379if (!is_cow_mapping(vm_flags)) { 380 WARN_ON_ONCE(vm_flags & VM_MAYWRITE); 381return -EFAULT; 382 } ... 394return0; 395 }
503 retry: 504/* 505 * If we have a pending SIGKILL, don't keep faulting pages and 506 * potentially allocating memory. 507 */ 508if (unlikely(fatal_signal_pending(current))) 509return i ? i : -ERESTARTSYS; 510 cond_resched(); 511 page = follow_page_mask(vma, start, foll_flags, &page_mask); 512if (!page) { 513int ret; 514 ret = faultin_page(tsk, vma, start, &foll_flags, 515 nonblocking); .......
若page为0,则会调用 faultin_page函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
290/* 291 * mmap_sem must be held on entry. If @nonblocking != NULL and 292 * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released. 293 * If it is, *@nonblocking will be set to 0 and -EBUSY returned. 294 */ 295staticintfaultin_page(struct task_struct *tsk, struct vm_area_struct *vma, 296unsignedlong address, unsignedint *flags, int *nonblocking) 297 { 298structmm_struct *mm = vma->vm_mm; 299unsignedint fault_flags = 0; 300int ret; ...... 310if (*flags & FOLL_WRITE) 311 fault_flags |= FAULT_FLAG_WRITE; ...... 320 321 ret = handle_mm_fault(mm, vma, address, fault_flags); ......
2330 if (reuse_swap_page(old_page)) { 2331 /* 2332 * The page is all ours. Move it to our anon_vma so 2333 * the rmap code will not search our parent or siblings. 2334 * Protected against the rmap code by the page lock. 2335 */ 2336 page_move_anon_rmap(old_page, vma, address); 2337 unlock_page(old_page); 2338 return wp_page_reuse(mm, vma, address, page_table, ptl, 2339 orig_pte, old_page, 0, 0); 2340 }
........ 345/* 346 * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when 347 * necessary, even if maybe_mkwrite decided not to set pte_write. We 348 * can thus safely do subsequent page lookups as if they were reads. 349 * But only do so when looping for pte_write is futile: in some cases 350 * userspace may also be wanting to write to the gotten user page, 351 * which a read fault here might prevent (a readonly page might get 352 * reCOWed by userspace write). 353 */ 354if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) 355 *flags &= ~FOLL_WRITE; 356return0; 357 }