From 0959e168678d2d95648317e1e5e46bcb358272eb Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 Jul 2018 13:21:22 +0100 Subject: s390/mm: Add huge page dirty sync support To do dirty loging with huge pages, we protect huge pmds in the gmap. When they are written to, we unprotect them and mark them dirty. We introduce the function gmap_test_and_clear_dirty_pmd which handles dirty sync for huge pages. Signed-off-by: Janosch Frank Acked-by: David Hildenbrand --- arch/s390/kvm/kvm-s390.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3b7a5151b6a5..4cff5e31ca36 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -511,19 +511,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) } static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot) { + int i; gfn_t cur_gfn, last_gfn; - unsigned long address; + unsigned long gaddr, vmaddr; struct gmap *gmap = kvm->arch.gmap; + DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); - /* Loop over all guest pages */ + /* Loop over all guest segments */ + cur_gfn = memslot->base_gfn; last_gfn = memslot->base_gfn + memslot->npages; - for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { - address = gfn_to_hva_memslot(memslot, cur_gfn); + for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { + gaddr = gfn_to_gpa(cur_gfn); + vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); + if (kvm_is_error_hva(vmaddr)) + continue; + + bitmap_zero(bitmap, _PAGE_ENTRIES); + gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); + for (i = 0; i < _PAGE_ENTRIES; i++) { + if (test_bit(i, bitmap)) + mark_page_dirty(kvm, cur_gfn + i); + } - if (test_and_clear_guest_dirty(gmap->mm, address)) - mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; cond_resched(); -- cgit v1.2.3 From bd096f6443194e57382686a3ac5f2ce4e82b55d7 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 Jul 2018 13:40:22 +0100 Subject: KVM: s390: Add skey emulation fault handling When doing skey emulation for huge guests, we now need to fault in pmds, as we don't have PGSTES anymore to store them when we do not have valid table entries. Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 15 +++++-- arch/s390/kvm/priv.c | 105 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 37 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cff5e31ca36..662f4d8046db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1551,6 +1551,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) uint8_t *keys; uint64_t hva; int srcu_idx, i, r = 0; + bool unlocked; if (args->flags != 0) return -EINVAL; @@ -1575,9 +1576,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + i = 0; down_read(¤t->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < args->count; i++) { + while (i < args->count) { + unlocked = false; hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; @@ -1591,8 +1594,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } r = set_guest_storage_key(current->mm, hva, keys[i], 0); - if (r) - break; + if (r) { + r = fixup_user_fault(current, current->mm, hva, + FAULT_FLAG_WRITE, &unlocked); + if (r) + break; + } + if (!r) + i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index eb0eb60c7be6..cfc5a62329f6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) static int handle_iske(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long gaddr, vmaddr; unsigned char key; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_iske++; @@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = get_guest_storage_key(current->mm, addr, &key); - up_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, vmaddr, &key); + + if (rc) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + up_read(¤t->mm->mmap_sem); vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) static int handle_rrbe(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long vmaddr, gaddr; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_rrbe++; @@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = reset_guest_reference_bit(current->mm, addr); - up_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, vmaddr); + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - + up_read(¤t->mm->mmap_sem); kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned char key, oldkey; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_sske++; @@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unlocked = false; - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - start += PAGE_SIZE; + + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long useraddr; + unsigned long vmaddr; + bool unlocked = false; /* Translate guest address to host address */ - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); - if (kvm_is_error_hva(useraddr)) + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)useraddr, PAGE_SIZE)) + if (clear_user((void __user *)vmaddr, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, useraddr, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - start += PAGE_SIZE; + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; + } } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { -- cgit v1.2.3 From a449938297e55e7e8958f8b48583f7d342da1930 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:31 +0100 Subject: KVM: s390: Add huge page enablement control General KVM huge page support on s390 has to be enabled via the kvm.hpage module parameter. Either nested or hpage can be enabled, as we currently do not support vSIE for huge backed guests. Once the vSIE support is added we will either drop the parameter or enable it as default. For a guest the feature has to be enabled through the new KVM_CAP_S390_HPAGE_1M capability and the hpage module parameter. Enabling it means that cmm can't be enabled for the vm and disables pfmf and storage key interpretation. This is due to the fact that in some cases, in upcoming patches, we have to split huge pages in the guest mapping to be able to set more granular memory protection on 4k pages. These split pages have fake page tables that are not visible to the Linux memory management which subsequently will not manage its PGSTEs, while the SIE will. Disabling these features lets us manage PGSTE data in a consistent matter and solve that problem. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand --- arch/s390/kvm/kvm-s390.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 662f4d8046db..f9d90337e64a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -172,6 +172,10 @@ static int nested; module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); +/* allow 1m huge page guest backing, if !nested */ +static int hpage; +module_param(hpage, int, 0444); +MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; + case KVM_CAP_S390_HPAGE_1M: + r = 0; + if (hpage) + r = 1; + break; case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; @@ -678,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_HPAGE_1M: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else if (!hpage || kvm->arch.use_cmma) + r = -EINVAL; + else { + r = 0; + kvm->mm->context.allow_gmap_hpage_1m = 1; + /* + * We might have to create fake 4k page + * tables. To avoid that the hardware works on + * stale PGSTEs, we emulate these instructions. + */ + kvm->arch.use_skf = 0; + kvm->arch.use_pfmfi = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -725,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!sclp.has_cmma) break; - ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (kvm->created_vcpus) + ret = -EBUSY; + else if (kvm->mm->context.allow_gmap_hpage_1m) + ret = -EINVAL; + else { kvm->arch.use_cmma = 1; /* Not compatible with cmma. */ kvm->arch.use_pfmfi = 0; @@ -4102,6 +4135,11 @@ static int __init kvm_s390_init(void) return -ENODEV; } + if (nested && hpage) { + pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + return -EINVAL; + } + for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); -- cgit v1.2.3