Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * guest access functions
0004  *
0005  * Copyright IBM Corp. 2014
0006  *
0007  */
0008 
0009 #include <linux/vmalloc.h>
0010 #include <linux/mm_types.h>
0011 #include <linux/err.h>
0012 #include <linux/pgtable.h>
0013 #include <linux/bitfield.h>
0014 
0015 #include <asm/gmap.h>
0016 #include "kvm-s390.h"
0017 #include "gaccess.h"
0018 #include <asm/switch_to.h>
0019 
0020 union asce {
0021     unsigned long val;
0022     struct {
0023         unsigned long origin : 52; /* Region- or Segment-Table Origin */
0024         unsigned long    : 2;
0025         unsigned long g  : 1; /* Subspace Group Control */
0026         unsigned long p  : 1; /* Private Space Control */
0027         unsigned long s  : 1; /* Storage-Alteration-Event Control */
0028         unsigned long x  : 1; /* Space-Switch-Event Control */
0029         unsigned long r  : 1; /* Real-Space Control */
0030         unsigned long    : 1;
0031         unsigned long dt : 2; /* Designation-Type Control */
0032         unsigned long tl : 2; /* Region- or Segment-Table Length */
0033     };
0034 };
0035 
0036 enum {
0037     ASCE_TYPE_SEGMENT = 0,
0038     ASCE_TYPE_REGION3 = 1,
0039     ASCE_TYPE_REGION2 = 2,
0040     ASCE_TYPE_REGION1 = 3
0041 };
0042 
0043 union region1_table_entry {
0044     unsigned long val;
0045     struct {
0046         unsigned long rto: 52;/* Region-Table Origin */
0047         unsigned long    : 2;
0048         unsigned long p  : 1; /* DAT-Protection Bit */
0049         unsigned long    : 1;
0050         unsigned long tf : 2; /* Region-Second-Table Offset */
0051         unsigned long i  : 1; /* Region-Invalid Bit */
0052         unsigned long    : 1;
0053         unsigned long tt : 2; /* Table-Type Bits */
0054         unsigned long tl : 2; /* Region-Second-Table Length */
0055     };
0056 };
0057 
0058 union region2_table_entry {
0059     unsigned long val;
0060     struct {
0061         unsigned long rto: 52;/* Region-Table Origin */
0062         unsigned long    : 2;
0063         unsigned long p  : 1; /* DAT-Protection Bit */
0064         unsigned long    : 1;
0065         unsigned long tf : 2; /* Region-Third-Table Offset */
0066         unsigned long i  : 1; /* Region-Invalid Bit */
0067         unsigned long    : 1;
0068         unsigned long tt : 2; /* Table-Type Bits */
0069         unsigned long tl : 2; /* Region-Third-Table Length */
0070     };
0071 };
0072 
0073 struct region3_table_entry_fc0 {
0074     unsigned long sto: 52;/* Segment-Table Origin */
0075     unsigned long    : 1;
0076     unsigned long fc : 1; /* Format-Control */
0077     unsigned long p  : 1; /* DAT-Protection Bit */
0078     unsigned long    : 1;
0079     unsigned long tf : 2; /* Segment-Table Offset */
0080     unsigned long i  : 1; /* Region-Invalid Bit */
0081     unsigned long cr : 1; /* Common-Region Bit */
0082     unsigned long tt : 2; /* Table-Type Bits */
0083     unsigned long tl : 2; /* Segment-Table Length */
0084 };
0085 
0086 struct region3_table_entry_fc1 {
0087     unsigned long rfaa : 33; /* Region-Frame Absolute Address */
0088     unsigned long    : 14;
0089     unsigned long av : 1; /* ACCF-Validity Control */
0090     unsigned long acc: 4; /* Access-Control Bits */
0091     unsigned long f  : 1; /* Fetch-Protection Bit */
0092     unsigned long fc : 1; /* Format-Control */
0093     unsigned long p  : 1; /* DAT-Protection Bit */
0094     unsigned long iep: 1; /* Instruction-Execution-Protection */
0095     unsigned long    : 2;
0096     unsigned long i  : 1; /* Region-Invalid Bit */
0097     unsigned long cr : 1; /* Common-Region Bit */
0098     unsigned long tt : 2; /* Table-Type Bits */
0099     unsigned long    : 2;
0100 };
0101 
0102 union region3_table_entry {
0103     unsigned long val;
0104     struct region3_table_entry_fc0 fc0;
0105     struct region3_table_entry_fc1 fc1;
0106     struct {
0107         unsigned long    : 53;
0108         unsigned long fc : 1; /* Format-Control */
0109         unsigned long    : 4;
0110         unsigned long i  : 1; /* Region-Invalid Bit */
0111         unsigned long cr : 1; /* Common-Region Bit */
0112         unsigned long tt : 2; /* Table-Type Bits */
0113         unsigned long    : 2;
0114     };
0115 };
0116 
0117 struct segment_entry_fc0 {
0118     unsigned long pto: 53;/* Page-Table Origin */
0119     unsigned long fc : 1; /* Format-Control */
0120     unsigned long p  : 1; /* DAT-Protection Bit */
0121     unsigned long    : 3;
0122     unsigned long i  : 1; /* Segment-Invalid Bit */
0123     unsigned long cs : 1; /* Common-Segment Bit */
0124     unsigned long tt : 2; /* Table-Type Bits */
0125     unsigned long    : 2;
0126 };
0127 
0128 struct segment_entry_fc1 {
0129     unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
0130     unsigned long    : 3;
0131     unsigned long av : 1; /* ACCF-Validity Control */
0132     unsigned long acc: 4; /* Access-Control Bits */
0133     unsigned long f  : 1; /* Fetch-Protection Bit */
0134     unsigned long fc : 1; /* Format-Control */
0135     unsigned long p  : 1; /* DAT-Protection Bit */
0136     unsigned long iep: 1; /* Instruction-Execution-Protection */
0137     unsigned long    : 2;
0138     unsigned long i  : 1; /* Segment-Invalid Bit */
0139     unsigned long cs : 1; /* Common-Segment Bit */
0140     unsigned long tt : 2; /* Table-Type Bits */
0141     unsigned long    : 2;
0142 };
0143 
0144 union segment_table_entry {
0145     unsigned long val;
0146     struct segment_entry_fc0 fc0;
0147     struct segment_entry_fc1 fc1;
0148     struct {
0149         unsigned long    : 53;
0150         unsigned long fc : 1; /* Format-Control */
0151         unsigned long    : 4;
0152         unsigned long i  : 1; /* Segment-Invalid Bit */
0153         unsigned long cs : 1; /* Common-Segment Bit */
0154         unsigned long tt : 2; /* Table-Type Bits */
0155         unsigned long    : 2;
0156     };
0157 };
0158 
0159 enum {
0160     TABLE_TYPE_SEGMENT = 0,
0161     TABLE_TYPE_REGION3 = 1,
0162     TABLE_TYPE_REGION2 = 2,
0163     TABLE_TYPE_REGION1 = 3
0164 };
0165 
0166 union page_table_entry {
0167     unsigned long val;
0168     struct {
0169         unsigned long pfra : 52; /* Page-Frame Real Address */
0170         unsigned long z  : 1; /* Zero Bit */
0171         unsigned long i  : 1; /* Page-Invalid Bit */
0172         unsigned long p  : 1; /* DAT-Protection Bit */
0173         unsigned long iep: 1; /* Instruction-Execution-Protection */
0174         unsigned long    : 8;
0175     };
0176 };
0177 
0178 /*
0179  * vaddress union in order to easily decode a virtual address into its
0180  * region first index, region second index etc. parts.
0181  */
0182 union vaddress {
0183     unsigned long addr;
0184     struct {
0185         unsigned long rfx : 11;
0186         unsigned long rsx : 11;
0187         unsigned long rtx : 11;
0188         unsigned long sx  : 11;
0189         unsigned long px  : 8;
0190         unsigned long bx  : 12;
0191     };
0192     struct {
0193         unsigned long rfx01 : 2;
0194         unsigned long       : 9;
0195         unsigned long rsx01 : 2;
0196         unsigned long       : 9;
0197         unsigned long rtx01 : 2;
0198         unsigned long       : 9;
0199         unsigned long sx01  : 2;
0200         unsigned long       : 29;
0201     };
0202 };
0203 
0204 /*
0205  * raddress union which will contain the result (real or absolute address)
0206  * after a page table walk. The rfaa, sfaa and pfra members are used to
0207  * simply assign them the value of a region, segment or page table entry.
0208  */
0209 union raddress {
0210     unsigned long addr;
0211     unsigned long rfaa : 33; /* Region-Frame Absolute Address */
0212     unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
0213     unsigned long pfra : 52; /* Page-Frame Real Address */
0214 };
0215 
0216 union alet {
0217     u32 val;
0218     struct {
0219         u32 reserved : 7;
0220         u32 p        : 1;
0221         u32 alesn    : 8;
0222         u32 alen     : 16;
0223     };
0224 };
0225 
0226 union ald {
0227     u32 val;
0228     struct {
0229         u32     : 1;
0230         u32 alo : 24;
0231         u32 all : 7;
0232     };
0233 };
0234 
0235 struct ale {
0236     unsigned long i      : 1; /* ALEN-Invalid Bit */
0237     unsigned long        : 5;
0238     unsigned long fo     : 1; /* Fetch-Only Bit */
0239     unsigned long p      : 1; /* Private Bit */
0240     unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
0241     unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
0242     unsigned long        : 32;
0243     unsigned long        : 1;
0244     unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
0245     unsigned long        : 6;
0246     unsigned long astesn : 32; /* ASTE Sequence Number */
0247 };
0248 
0249 struct aste {
0250     unsigned long i      : 1; /* ASX-Invalid Bit */
0251     unsigned long ato    : 29; /* Authority-Table Origin */
0252     unsigned long        : 1;
0253     unsigned long b      : 1; /* Base-Space Bit */
0254     unsigned long ax     : 16; /* Authorization Index */
0255     unsigned long atl    : 12; /* Authority-Table Length */
0256     unsigned long        : 2;
0257     unsigned long ca     : 1; /* Controlled-ASN Bit */
0258     unsigned long ra     : 1; /* Reusable-ASN Bit */
0259     unsigned long asce   : 64; /* Address-Space-Control Element */
0260     unsigned long ald    : 32;
0261     unsigned long astesn : 32;
0262     /* .. more fields there */
0263 };
0264 
0265 int ipte_lock_held(struct kvm *kvm)
0266 {
0267     if (sclp.has_siif) {
0268         int rc;
0269 
0270         read_lock(&kvm->arch.sca_lock);
0271         rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
0272         read_unlock(&kvm->arch.sca_lock);
0273         return rc;
0274     }
0275     return kvm->arch.ipte_lock_count != 0;
0276 }
0277 
0278 static void ipte_lock_simple(struct kvm *kvm)
0279 {
0280     union ipte_control old, new, *ic;
0281 
0282     mutex_lock(&kvm->arch.ipte_mutex);
0283     kvm->arch.ipte_lock_count++;
0284     if (kvm->arch.ipte_lock_count > 1)
0285         goto out;
0286 retry:
0287     read_lock(&kvm->arch.sca_lock);
0288     ic = kvm_s390_get_ipte_control(kvm);
0289     do {
0290         old = READ_ONCE(*ic);
0291         if (old.k) {
0292             read_unlock(&kvm->arch.sca_lock);
0293             cond_resched();
0294             goto retry;
0295         }
0296         new = old;
0297         new.k = 1;
0298     } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
0299     read_unlock(&kvm->arch.sca_lock);
0300 out:
0301     mutex_unlock(&kvm->arch.ipte_mutex);
0302 }
0303 
0304 static void ipte_unlock_simple(struct kvm *kvm)
0305 {
0306     union ipte_control old, new, *ic;
0307 
0308     mutex_lock(&kvm->arch.ipte_mutex);
0309     kvm->arch.ipte_lock_count--;
0310     if (kvm->arch.ipte_lock_count)
0311         goto out;
0312     read_lock(&kvm->arch.sca_lock);
0313     ic = kvm_s390_get_ipte_control(kvm);
0314     do {
0315         old = READ_ONCE(*ic);
0316         new = old;
0317         new.k = 0;
0318     } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
0319     read_unlock(&kvm->arch.sca_lock);
0320     wake_up(&kvm->arch.ipte_wq);
0321 out:
0322     mutex_unlock(&kvm->arch.ipte_mutex);
0323 }
0324 
0325 static void ipte_lock_siif(struct kvm *kvm)
0326 {
0327     union ipte_control old, new, *ic;
0328 
0329 retry:
0330     read_lock(&kvm->arch.sca_lock);
0331     ic = kvm_s390_get_ipte_control(kvm);
0332     do {
0333         old = READ_ONCE(*ic);
0334         if (old.kg) {
0335             read_unlock(&kvm->arch.sca_lock);
0336             cond_resched();
0337             goto retry;
0338         }
0339         new = old;
0340         new.k = 1;
0341         new.kh++;
0342     } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
0343     read_unlock(&kvm->arch.sca_lock);
0344 }
0345 
0346 static void ipte_unlock_siif(struct kvm *kvm)
0347 {
0348     union ipte_control old, new, *ic;
0349 
0350     read_lock(&kvm->arch.sca_lock);
0351     ic = kvm_s390_get_ipte_control(kvm);
0352     do {
0353         old = READ_ONCE(*ic);
0354         new = old;
0355         new.kh--;
0356         if (!new.kh)
0357             new.k = 0;
0358     } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
0359     read_unlock(&kvm->arch.sca_lock);
0360     if (!new.kh)
0361         wake_up(&kvm->arch.ipte_wq);
0362 }
0363 
0364 void ipte_lock(struct kvm *kvm)
0365 {
0366     if (sclp.has_siif)
0367         ipte_lock_siif(kvm);
0368     else
0369         ipte_lock_simple(kvm);
0370 }
0371 
0372 void ipte_unlock(struct kvm *kvm)
0373 {
0374     if (sclp.has_siif)
0375         ipte_unlock_siif(kvm);
0376     else
0377         ipte_unlock_simple(kvm);
0378 }
0379 
0380 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
0381               enum gacc_mode mode)
0382 {
0383     union alet alet;
0384     struct ale ale;
0385     struct aste aste;
0386     unsigned long ald_addr, authority_table_addr;
0387     union ald ald;
0388     int eax, rc;
0389     u8 authority_table;
0390 
0391     if (ar >= NUM_ACRS)
0392         return -EINVAL;
0393 
0394     save_access_regs(vcpu->run->s.regs.acrs);
0395     alet.val = vcpu->run->s.regs.acrs[ar];
0396 
0397     if (ar == 0 || alet.val == 0) {
0398         asce->val = vcpu->arch.sie_block->gcr[1];
0399         return 0;
0400     } else if (alet.val == 1) {
0401         asce->val = vcpu->arch.sie_block->gcr[7];
0402         return 0;
0403     }
0404 
0405     if (alet.reserved)
0406         return PGM_ALET_SPECIFICATION;
0407 
0408     if (alet.p)
0409         ald_addr = vcpu->arch.sie_block->gcr[5];
0410     else
0411         ald_addr = vcpu->arch.sie_block->gcr[2];
0412     ald_addr &= 0x7fffffc0;
0413 
0414     rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
0415     if (rc)
0416         return rc;
0417 
0418     if (alet.alen / 8 > ald.all)
0419         return PGM_ALEN_TRANSLATION;
0420 
0421     if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
0422         return PGM_ADDRESSING;
0423 
0424     rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
0425                  sizeof(struct ale));
0426     if (rc)
0427         return rc;
0428 
0429     if (ale.i == 1)
0430         return PGM_ALEN_TRANSLATION;
0431     if (ale.alesn != alet.alesn)
0432         return PGM_ALE_SEQUENCE;
0433 
0434     rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
0435     if (rc)
0436         return rc;
0437 
0438     if (aste.i)
0439         return PGM_ASTE_VALIDITY;
0440     if (aste.astesn != ale.astesn)
0441         return PGM_ASTE_SEQUENCE;
0442 
0443     if (ale.p == 1) {
0444         eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
0445         if (ale.aleax != eax) {
0446             if (eax / 16 > aste.atl)
0447                 return PGM_EXTENDED_AUTHORITY;
0448 
0449             authority_table_addr = aste.ato * 4 + eax / 4;
0450 
0451             rc = read_guest_real(vcpu, authority_table_addr,
0452                          &authority_table,
0453                          sizeof(u8));
0454             if (rc)
0455                 return rc;
0456 
0457             if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
0458                 return PGM_EXTENDED_AUTHORITY;
0459         }
0460     }
0461 
0462     if (ale.fo == 1 && mode == GACC_STORE)
0463         return PGM_PROTECTION;
0464 
0465     asce->val = aste.asce;
0466     return 0;
0467 }
0468 
0469 struct trans_exc_code_bits {
0470     unsigned long addr : 52; /* Translation-exception Address */
0471     unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
0472     unsigned long      : 2;
0473     unsigned long b56  : 1;
0474     unsigned long      : 3;
0475     unsigned long b60  : 1;
0476     unsigned long b61  : 1;
0477     unsigned long as   : 2;  /* ASCE Identifier */
0478 };
0479 
0480 enum {
0481     FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
0482     FSI_STORE   = 1, /* Exception was due to store operation */
0483     FSI_FETCH   = 2  /* Exception was due to fetch operation */
0484 };
0485 
0486 enum prot_type {
0487     PROT_TYPE_LA   = 0,
0488     PROT_TYPE_KEYC = 1,
0489     PROT_TYPE_ALC  = 2,
0490     PROT_TYPE_DAT  = 3,
0491     PROT_TYPE_IEP  = 4,
0492     /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
0493     PROT_NONE,
0494 };
0495 
0496 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
0497                 enum gacc_mode mode, enum prot_type prot, bool terminate)
0498 {
0499     struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
0500     struct trans_exc_code_bits *tec;
0501 
0502     memset(pgm, 0, sizeof(*pgm));
0503     pgm->code = code;
0504     tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
0505 
0506     switch (code) {
0507     case PGM_PROTECTION:
0508         switch (prot) {
0509         case PROT_NONE:
0510             /* We should never get here, acts like termination */
0511             WARN_ON_ONCE(1);
0512             break;
0513         case PROT_TYPE_IEP:
0514             tec->b61 = 1;
0515             fallthrough;
0516         case PROT_TYPE_LA:
0517             tec->b56 = 1;
0518             break;
0519         case PROT_TYPE_KEYC:
0520             tec->b60 = 1;
0521             break;
0522         case PROT_TYPE_ALC:
0523             tec->b60 = 1;
0524             fallthrough;
0525         case PROT_TYPE_DAT:
0526             tec->b61 = 1;
0527             break;
0528         }
0529         if (terminate) {
0530             tec->b56 = 0;
0531             tec->b60 = 0;
0532             tec->b61 = 0;
0533         }
0534         fallthrough;
0535     case PGM_ASCE_TYPE:
0536     case PGM_PAGE_TRANSLATION:
0537     case PGM_REGION_FIRST_TRANS:
0538     case PGM_REGION_SECOND_TRANS:
0539     case PGM_REGION_THIRD_TRANS:
0540     case PGM_SEGMENT_TRANSLATION:
0541         /*
0542          * op_access_id only applies to MOVE_PAGE -> set bit 61
0543          * exc_access_id has to be set to 0 for some instructions. Both
0544          * cases have to be handled by the caller.
0545          */
0546         tec->addr = gva >> PAGE_SHIFT;
0547         tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
0548         tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
0549         fallthrough;
0550     case PGM_ALEN_TRANSLATION:
0551     case PGM_ALE_SEQUENCE:
0552     case PGM_ASTE_VALIDITY:
0553     case PGM_ASTE_SEQUENCE:
0554     case PGM_EXTENDED_AUTHORITY:
0555         /*
0556          * We can always store exc_access_id, as it is
0557          * undefined for non-ar cases. It is undefined for
0558          * most DAT protection exceptions.
0559          */
0560         pgm->exc_access_id = ar;
0561         break;
0562     }
0563     return code;
0564 }
0565 
0566 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
0567              enum gacc_mode mode, enum prot_type prot)
0568 {
0569     return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
0570 }
0571 
0572 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
0573              unsigned long ga, u8 ar, enum gacc_mode mode)
0574 {
0575     int rc;
0576     struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
0577 
0578     if (!psw.dat) {
0579         asce->val = 0;
0580         asce->r = 1;
0581         return 0;
0582     }
0583 
0584     if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
0585         psw.as = PSW_BITS_AS_PRIMARY;
0586 
0587     switch (psw.as) {
0588     case PSW_BITS_AS_PRIMARY:
0589         asce->val = vcpu->arch.sie_block->gcr[1];
0590         return 0;
0591     case PSW_BITS_AS_SECONDARY:
0592         asce->val = vcpu->arch.sie_block->gcr[7];
0593         return 0;
0594     case PSW_BITS_AS_HOME:
0595         asce->val = vcpu->arch.sie_block->gcr[13];
0596         return 0;
0597     case PSW_BITS_AS_ACCREG:
0598         rc = ar_translation(vcpu, asce, ar, mode);
0599         if (rc > 0)
0600             return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
0601         return rc;
0602     }
0603     return 0;
0604 }
0605 
0606 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
0607 {
0608     return kvm_read_guest(kvm, gpa, val, sizeof(*val));
0609 }
0610 
0611 /**
0612  * guest_translate - translate a guest virtual into a guest absolute address
0613  * @vcpu: virtual cpu
0614  * @gva: guest virtual address
0615  * @gpa: points to where guest physical (absolute) address should be stored
0616  * @asce: effective asce
0617  * @mode: indicates the access mode to be used
0618  * @prot: returns the type for protection exceptions
0619  *
0620  * Translate a guest virtual address into a guest absolute address by means
0621  * of dynamic address translation as specified by the architecture.
0622  * If the resulting absolute address is not available in the configuration
0623  * an addressing exception is indicated and @gpa will not be changed.
0624  *
0625  * Returns: - zero on success; @gpa contains the resulting absolute address
0626  *      - a negative value if guest access failed due to e.g. broken
0627  *        guest mapping
0628  *      - a positve value if an access exception happened. In this case
0629  *        the returned value is the program interruption code as defined
0630  *        by the architecture
0631  */
0632 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
0633                      unsigned long *gpa, const union asce asce,
0634                      enum gacc_mode mode, enum prot_type *prot)
0635 {
0636     union vaddress vaddr = {.addr = gva};
0637     union raddress raddr = {.addr = gva};
0638     union page_table_entry pte;
0639     int dat_protection = 0;
0640     int iep_protection = 0;
0641     union ctlreg0 ctlreg0;
0642     unsigned long ptr;
0643     int edat1, edat2, iep;
0644 
0645     ctlreg0.val = vcpu->arch.sie_block->gcr[0];
0646     edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
0647     edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
0648     iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
0649     if (asce.r)
0650         goto real_address;
0651     ptr = asce.origin * PAGE_SIZE;
0652     switch (asce.dt) {
0653     case ASCE_TYPE_REGION1:
0654         if (vaddr.rfx01 > asce.tl)
0655             return PGM_REGION_FIRST_TRANS;
0656         ptr += vaddr.rfx * 8;
0657         break;
0658     case ASCE_TYPE_REGION2:
0659         if (vaddr.rfx)
0660             return PGM_ASCE_TYPE;
0661         if (vaddr.rsx01 > asce.tl)
0662             return PGM_REGION_SECOND_TRANS;
0663         ptr += vaddr.rsx * 8;
0664         break;
0665     case ASCE_TYPE_REGION3:
0666         if (vaddr.rfx || vaddr.rsx)
0667             return PGM_ASCE_TYPE;
0668         if (vaddr.rtx01 > asce.tl)
0669             return PGM_REGION_THIRD_TRANS;
0670         ptr += vaddr.rtx * 8;
0671         break;
0672     case ASCE_TYPE_SEGMENT:
0673         if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
0674             return PGM_ASCE_TYPE;
0675         if (vaddr.sx01 > asce.tl)
0676             return PGM_SEGMENT_TRANSLATION;
0677         ptr += vaddr.sx * 8;
0678         break;
0679     }
0680     switch (asce.dt) {
0681     case ASCE_TYPE_REGION1: {
0682         union region1_table_entry rfte;
0683 
0684         if (kvm_is_error_gpa(vcpu->kvm, ptr))
0685             return PGM_ADDRESSING;
0686         if (deref_table(vcpu->kvm, ptr, &rfte.val))
0687             return -EFAULT;
0688         if (rfte.i)
0689             return PGM_REGION_FIRST_TRANS;
0690         if (rfte.tt != TABLE_TYPE_REGION1)
0691             return PGM_TRANSLATION_SPEC;
0692         if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
0693             return PGM_REGION_SECOND_TRANS;
0694         if (edat1)
0695             dat_protection |= rfte.p;
0696         ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
0697     }
0698         fallthrough;
0699     case ASCE_TYPE_REGION2: {
0700         union region2_table_entry rste;
0701 
0702         if (kvm_is_error_gpa(vcpu->kvm, ptr))
0703             return PGM_ADDRESSING;
0704         if (deref_table(vcpu->kvm, ptr, &rste.val))
0705             return -EFAULT;
0706         if (rste.i)
0707             return PGM_REGION_SECOND_TRANS;
0708         if (rste.tt != TABLE_TYPE_REGION2)
0709             return PGM_TRANSLATION_SPEC;
0710         if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
0711             return PGM_REGION_THIRD_TRANS;
0712         if (edat1)
0713             dat_protection |= rste.p;
0714         ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
0715     }
0716         fallthrough;
0717     case ASCE_TYPE_REGION3: {
0718         union region3_table_entry rtte;
0719 
0720         if (kvm_is_error_gpa(vcpu->kvm, ptr))
0721             return PGM_ADDRESSING;
0722         if (deref_table(vcpu->kvm, ptr, &rtte.val))
0723             return -EFAULT;
0724         if (rtte.i)
0725             return PGM_REGION_THIRD_TRANS;
0726         if (rtte.tt != TABLE_TYPE_REGION3)
0727             return PGM_TRANSLATION_SPEC;
0728         if (rtte.cr && asce.p && edat2)
0729             return PGM_TRANSLATION_SPEC;
0730         if (rtte.fc && edat2) {
0731             dat_protection |= rtte.fc1.p;
0732             iep_protection = rtte.fc1.iep;
0733             raddr.rfaa = rtte.fc1.rfaa;
0734             goto absolute_address;
0735         }
0736         if (vaddr.sx01 < rtte.fc0.tf)
0737             return PGM_SEGMENT_TRANSLATION;
0738         if (vaddr.sx01 > rtte.fc0.tl)
0739             return PGM_SEGMENT_TRANSLATION;
0740         if (edat1)
0741             dat_protection |= rtte.fc0.p;
0742         ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
0743     }
0744         fallthrough;
0745     case ASCE_TYPE_SEGMENT: {
0746         union segment_table_entry ste;
0747 
0748         if (kvm_is_error_gpa(vcpu->kvm, ptr))
0749             return PGM_ADDRESSING;
0750         if (deref_table(vcpu->kvm, ptr, &ste.val))
0751             return -EFAULT;
0752         if (ste.i)
0753             return PGM_SEGMENT_TRANSLATION;
0754         if (ste.tt != TABLE_TYPE_SEGMENT)
0755             return PGM_TRANSLATION_SPEC;
0756         if (ste.cs && asce.p)
0757             return PGM_TRANSLATION_SPEC;
0758         if (ste.fc && edat1) {
0759             dat_protection |= ste.fc1.p;
0760             iep_protection = ste.fc1.iep;
0761             raddr.sfaa = ste.fc1.sfaa;
0762             goto absolute_address;
0763         }
0764         dat_protection |= ste.fc0.p;
0765         ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
0766     }
0767     }
0768     if (kvm_is_error_gpa(vcpu->kvm, ptr))
0769         return PGM_ADDRESSING;
0770     if (deref_table(vcpu->kvm, ptr, &pte.val))
0771         return -EFAULT;
0772     if (pte.i)
0773         return PGM_PAGE_TRANSLATION;
0774     if (pte.z)
0775         return PGM_TRANSLATION_SPEC;
0776     dat_protection |= pte.p;
0777     iep_protection = pte.iep;
0778     raddr.pfra = pte.pfra;
0779 real_address:
0780     raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
0781 absolute_address:
0782     if (mode == GACC_STORE && dat_protection) {
0783         *prot = PROT_TYPE_DAT;
0784         return PGM_PROTECTION;
0785     }
0786     if (mode == GACC_IFETCH && iep_protection && iep) {
0787         *prot = PROT_TYPE_IEP;
0788         return PGM_PROTECTION;
0789     }
0790     if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
0791         return PGM_ADDRESSING;
0792     *gpa = raddr.addr;
0793     return 0;
0794 }
0795 
0796 static inline int is_low_address(unsigned long ga)
0797 {
0798     /* Check for address ranges 0..511 and 4096..4607 */
0799     return (ga & ~0x11fful) == 0;
0800 }
0801 
0802 static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
0803                       const union asce asce)
0804 {
0805     union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
0806     psw_t *psw = &vcpu->arch.sie_block->gpsw;
0807 
0808     if (!ctlreg0.lap)
0809         return 0;
0810     if (psw_bits(*psw).dat && asce.p)
0811         return 0;
0812     return 1;
0813 }
0814 
0815 static int vm_check_access_key(struct kvm *kvm, u8 access_key,
0816                    enum gacc_mode mode, gpa_t gpa)
0817 {
0818     u8 storage_key, access_control;
0819     bool fetch_protected;
0820     unsigned long hva;
0821     int r;
0822 
0823     if (access_key == 0)
0824         return 0;
0825 
0826     hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
0827     if (kvm_is_error_hva(hva))
0828         return PGM_ADDRESSING;
0829 
0830     mmap_read_lock(current->mm);
0831     r = get_guest_storage_key(current->mm, hva, &storage_key);
0832     mmap_read_unlock(current->mm);
0833     if (r)
0834         return r;
0835     access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
0836     if (access_control == access_key)
0837         return 0;
0838     fetch_protected = storage_key & _PAGE_FP_BIT;
0839     if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
0840         return 0;
0841     return PGM_PROTECTION;
0842 }
0843 
0844 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
0845                        union asce asce)
0846 {
0847     psw_t *psw = &vcpu->arch.sie_block->gpsw;
0848     unsigned long override;
0849 
0850     if (mode == GACC_FETCH || mode == GACC_IFETCH) {
0851         /* check if fetch protection override enabled */
0852         override = vcpu->arch.sie_block->gcr[0];
0853         override &= CR0_FETCH_PROTECTION_OVERRIDE;
0854         /* not applicable if subject to DAT && private space */
0855         override = override && !(psw_bits(*psw).dat && asce.p);
0856         return override;
0857     }
0858     return false;
0859 }
0860 
0861 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
0862 {
0863     return ga < 2048 && ga + len <= 2048;
0864 }
0865 
0866 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
0867 {
0868     /* check if storage protection override enabled */
0869     return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
0870 }
0871 
0872 static bool storage_prot_override_applies(u8 access_control)
0873 {
0874     /* matches special storage protection override key (9) -> allow */
0875     return access_control == PAGE_SPO_ACC;
0876 }
0877 
0878 static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
0879                  enum gacc_mode mode, union asce asce, gpa_t gpa,
0880                  unsigned long ga, unsigned int len)
0881 {
0882     u8 storage_key, access_control;
0883     unsigned long hva;
0884     int r;
0885 
0886     /* access key 0 matches any storage key -> allow */
0887     if (access_key == 0)
0888         return 0;
0889     /*
0890      * caller needs to ensure that gfn is accessible, so we can
0891      * assume that this cannot fail
0892      */
0893     hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
0894     mmap_read_lock(current->mm);
0895     r = get_guest_storage_key(current->mm, hva, &storage_key);
0896     mmap_read_unlock(current->mm);
0897     if (r)
0898         return r;
0899     access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
0900     /* access key matches storage key -> allow */
0901     if (access_control == access_key)
0902         return 0;
0903     if (mode == GACC_FETCH || mode == GACC_IFETCH) {
0904         /* it is a fetch and fetch protection is off -> allow */
0905         if (!(storage_key & _PAGE_FP_BIT))
0906             return 0;
0907         if (fetch_prot_override_applicable(vcpu, mode, asce) &&
0908             fetch_prot_override_applies(ga, len))
0909             return 0;
0910     }
0911     if (storage_prot_override_applicable(vcpu) &&
0912         storage_prot_override_applies(access_control))
0913         return 0;
0914     return PGM_PROTECTION;
0915 }
0916 
0917 /**
0918  * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
0919  * covering a logical range
0920  * @vcpu: virtual cpu
0921  * @ga: guest address, start of range
0922  * @ar: access register
0923  * @gpas: output argument, may be NULL
0924  * @len: length of range in bytes
0925  * @asce: address-space-control element to use for translation
0926  * @mode: access mode
0927  * @access_key: access key to mach the range's storage keys against
0928  *
0929  * Translate a logical range to a series of guest absolute addresses,
0930  * such that the concatenation of page fragments starting at each gpa make up
0931  * the whole range.
0932  * The translation is performed as if done by the cpu for the given @asce, @ar,
0933  * @mode and state of the @vcpu.
0934  * If the translation causes an exception, its program interruption code is
0935  * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
0936  * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
0937  * a correct exception into the guest.
0938  * The resulting gpas are stored into @gpas, unless it is NULL.
0939  *
0940  * Note: All fragments except the first one start at the beginning of a page.
0941  *   When deriving the boundaries of a fragment from a gpa, all but the last
0942  *   fragment end at the end of the page.
0943  *
0944  * Return:
0945  * * 0      - success
0946  * * <0     - translation could not be performed, for example if  guest
0947  *        memory could not be accessed
0948  * * >0     - an access exception occurred. In this case the returned value
0949  *        is the program interruption code and the contents of pgm may
0950  *        be used to inject an exception into the guest.
0951  */
0952 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
0953                    unsigned long *gpas, unsigned long len,
0954                    const union asce asce, enum gacc_mode mode,
0955                    u8 access_key)
0956 {
0957     psw_t *psw = &vcpu->arch.sie_block->gpsw;
0958     unsigned int offset = offset_in_page(ga);
0959     unsigned int fragment_len;
0960     int lap_enabled, rc = 0;
0961     enum prot_type prot;
0962     unsigned long gpa;
0963 
0964     lap_enabled = low_address_protection_enabled(vcpu, asce);
0965     while (min(PAGE_SIZE - offset, len) > 0) {
0966         fragment_len = min(PAGE_SIZE - offset, len);
0967         ga = kvm_s390_logical_to_effective(vcpu, ga);
0968         if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
0969             return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
0970                      PROT_TYPE_LA);
0971         if (psw_bits(*psw).dat) {
0972             rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
0973             if (rc < 0)
0974                 return rc;
0975         } else {
0976             gpa = kvm_s390_real_to_abs(vcpu, ga);
0977             if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
0978                 rc = PGM_ADDRESSING;
0979                 prot = PROT_NONE;
0980             }
0981         }
0982         if (rc)
0983             return trans_exc(vcpu, rc, ga, ar, mode, prot);
0984         rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
0985                        fragment_len);
0986         if (rc)
0987             return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
0988         if (gpas)
0989             *gpas++ = gpa;
0990         offset = 0;
0991         ga += fragment_len;
0992         len -= fragment_len;
0993     }
0994     return 0;
0995 }
0996 
0997 static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
0998                  void *data, unsigned int len)
0999 {
1000     const unsigned int offset = offset_in_page(gpa);
1001     const gfn_t gfn = gpa_to_gfn(gpa);
1002     int rc;
1003 
1004     if (mode == GACC_STORE)
1005         rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
1006     else
1007         rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
1008     return rc;
1009 }
1010 
1011 static int
1012 access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
1013                void *data, unsigned int len, u8 access_key)
1014 {
1015     struct kvm_memory_slot *slot;
1016     bool writable;
1017     gfn_t gfn;
1018     hva_t hva;
1019     int rc;
1020 
1021     gfn = gpa >> PAGE_SHIFT;
1022     slot = gfn_to_memslot(kvm, gfn);
1023     hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1024 
1025     if (kvm_is_error_hva(hva))
1026         return PGM_ADDRESSING;
1027     /*
1028      * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
1029      * Don't try to actually handle that case.
1030      */
1031     if (!writable && mode == GACC_STORE)
1032         return -EOPNOTSUPP;
1033     hva += offset_in_page(gpa);
1034     if (mode == GACC_STORE)
1035         rc = copy_to_user_key((void __user *)hva, data, len, access_key);
1036     else
1037         rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
1038     if (rc)
1039         return PGM_PROTECTION;
1040     if (mode == GACC_STORE)
1041         mark_page_dirty_in_slot(kvm, slot, gfn);
1042     return 0;
1043 }
1044 
1045 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
1046                   unsigned long len, enum gacc_mode mode, u8 access_key)
1047 {
1048     int offset = offset_in_page(gpa);
1049     int fragment_len;
1050     int rc;
1051 
1052     while (min(PAGE_SIZE - offset, len) > 0) {
1053         fragment_len = min(PAGE_SIZE - offset, len);
1054         rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
1055         if (rc)
1056             return rc;
1057         offset = 0;
1058         len -= fragment_len;
1059         data += fragment_len;
1060         gpa += fragment_len;
1061     }
1062     return 0;
1063 }
1064 
1065 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
1066               void *data, unsigned long len, enum gacc_mode mode,
1067               u8 access_key)
1068 {
1069     psw_t *psw = &vcpu->arch.sie_block->gpsw;
1070     unsigned long nr_pages, idx;
1071     unsigned long gpa_array[2];
1072     unsigned int fragment_len;
1073     unsigned long *gpas;
1074     enum prot_type prot;
1075     int need_ipte_lock;
1076     union asce asce;
1077     bool try_storage_prot_override;
1078     bool try_fetch_prot_override;
1079     int rc;
1080 
1081     if (!len)
1082         return 0;
1083     ga = kvm_s390_logical_to_effective(vcpu, ga);
1084     rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
1085     if (rc)
1086         return rc;
1087     nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
1088     gpas = gpa_array;
1089     if (nr_pages > ARRAY_SIZE(gpa_array))
1090         gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
1091     if (!gpas)
1092         return -ENOMEM;
1093     try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
1094     try_storage_prot_override = storage_prot_override_applicable(vcpu);
1095     need_ipte_lock = psw_bits(*psw).dat && !asce.r;
1096     if (need_ipte_lock)
1097         ipte_lock(vcpu->kvm);
1098     /*
1099      * Since we do the access further down ultimately via a move instruction
1100      * that does key checking and returns an error in case of a protection
1101      * violation, we don't need to do the check during address translation.
1102      * Skip it by passing access key 0, which matches any storage key,
1103      * obviating the need for any further checks. As a result the check is
1104      * handled entirely in hardware on access, we only need to take care to
1105      * forego key protection checking if fetch protection override applies or
1106      * retry with the special key 9 in case of storage protection override.
1107      */
1108     rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
1109     if (rc)
1110         goto out_unlock;
1111     for (idx = 0; idx < nr_pages; idx++) {
1112         fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
1113         if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
1114             rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
1115                            data, fragment_len);
1116         } else {
1117             rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
1118                             data, fragment_len, access_key);
1119         }
1120         if (rc == PGM_PROTECTION && try_storage_prot_override)
1121             rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
1122                             data, fragment_len, PAGE_SPO_ACC);
1123         if (rc)
1124             break;
1125         len -= fragment_len;
1126         data += fragment_len;
1127         ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
1128     }
1129     if (rc > 0) {
1130         bool terminate = (mode == GACC_STORE) && (idx > 0);
1131 
1132         if (rc == PGM_PROTECTION)
1133             prot = PROT_TYPE_KEYC;
1134         else
1135             prot = PROT_NONE;
1136         rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
1137     }
1138 out_unlock:
1139     if (need_ipte_lock)
1140         ipte_unlock(vcpu->kvm);
1141     if (nr_pages > ARRAY_SIZE(gpa_array))
1142         vfree(gpas);
1143     return rc;
1144 }
1145 
1146 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
1147               void *data, unsigned long len, enum gacc_mode mode)
1148 {
1149     unsigned int fragment_len;
1150     unsigned long gpa;
1151     int rc = 0;
1152 
1153     while (len && !rc) {
1154         gpa = kvm_s390_real_to_abs(vcpu, gra);
1155         fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
1156         rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
1157         len -= fragment_len;
1158         gra += fragment_len;
1159         data += fragment_len;
1160     }
1161     return rc;
1162 }
1163 
1164 /**
1165  * guest_translate_address_with_key - translate guest logical into guest absolute address
1166  * @vcpu: virtual cpu
1167  * @gva: Guest virtual address
1168  * @ar: Access register
1169  * @gpa: Guest physical address
1170  * @mode: Translation access mode
1171  * @access_key: access key to mach the storage key with
1172  *
1173  * Parameter semantics are the same as the ones from guest_translate.
1174  * The memory contents at the guest address are not changed.
1175  *
1176  * Note: The IPTE lock is not taken during this function, so the caller
1177  * has to take care of this.
1178  */
1179 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1180                      unsigned long *gpa, enum gacc_mode mode,
1181                      u8 access_key)
1182 {
1183     union asce asce;
1184     int rc;
1185 
1186     gva = kvm_s390_logical_to_effective(vcpu, gva);
1187     rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1188     if (rc)
1189         return rc;
1190     return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
1191                    access_key);
1192 }
1193 
1194 /**
1195  * check_gva_range - test a range of guest virtual addresses for accessibility
1196  * @vcpu: virtual cpu
1197  * @gva: Guest virtual address
1198  * @ar: Access register
1199  * @length: Length of test range
1200  * @mode: Translation access mode
1201  * @access_key: access key to mach the storage keys with
1202  */
1203 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1204             unsigned long length, enum gacc_mode mode, u8 access_key)
1205 {
1206     union asce asce;
1207     int rc = 0;
1208 
1209     rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1210     if (rc)
1211         return rc;
1212     ipte_lock(vcpu->kvm);
1213     rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
1214                  access_key);
1215     ipte_unlock(vcpu->kvm);
1216 
1217     return rc;
1218 }
1219 
1220 /**
1221  * check_gpa_range - test a range of guest physical addresses for accessibility
1222  * @kvm: virtual machine instance
1223  * @gpa: guest physical address
1224  * @length: length of test range
1225  * @mode: access mode to test, relevant for storage keys
1226  * @access_key: access key to mach the storage keys with
1227  */
1228 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
1229             enum gacc_mode mode, u8 access_key)
1230 {
1231     unsigned int fragment_len;
1232     int rc = 0;
1233 
1234     while (length && !rc) {
1235         fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
1236         rc = vm_check_access_key(kvm, access_key, mode, gpa);
1237         length -= fragment_len;
1238         gpa += fragment_len;
1239     }
1240     return rc;
1241 }
1242 
1243 /**
1244  * kvm_s390_check_low_addr_prot_real - check for low-address protection
1245  * @vcpu: virtual cpu
1246  * @gra: Guest real address
1247  *
1248  * Checks whether an address is subject to low-address protection and set
1249  * up vcpu->arch.pgm accordingly if necessary.
1250  *
1251  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
1252  */
1253 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
1254 {
1255     union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
1256 
1257     if (!ctlreg0.lap || !is_low_address(gra))
1258         return 0;
1259     return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
1260 }
1261 
1262 /**
1263  * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
1264  * @sg: pointer to the shadow guest address space structure
1265  * @saddr: faulting address in the shadow gmap
1266  * @pgt: pointer to the beginning of the page table for the given address if
1267  *   successful (return value 0), or to the first invalid DAT entry in
1268  *   case of exceptions (return value > 0)
1269  * @dat_protection: referenced memory is write protected
1270  * @fake: pgt references contiguous guest memory block, not a pgtable
1271  */
1272 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
1273                   unsigned long *pgt, int *dat_protection,
1274                   int *fake)
1275 {
1276     struct gmap *parent;
1277     union asce asce;
1278     union vaddress vaddr;
1279     unsigned long ptr;
1280     int rc;
1281 
1282     *fake = 0;
1283     *dat_protection = 0;
1284     parent = sg->parent;
1285     vaddr.addr = saddr;
1286     asce.val = sg->orig_asce;
1287     ptr = asce.origin * PAGE_SIZE;
1288     if (asce.r) {
1289         *fake = 1;
1290         ptr = 0;
1291         asce.dt = ASCE_TYPE_REGION1;
1292     }
1293     switch (asce.dt) {
1294     case ASCE_TYPE_REGION1:
1295         if (vaddr.rfx01 > asce.tl && !*fake)
1296             return PGM_REGION_FIRST_TRANS;
1297         break;
1298     case ASCE_TYPE_REGION2:
1299         if (vaddr.rfx)
1300             return PGM_ASCE_TYPE;
1301         if (vaddr.rsx01 > asce.tl)
1302             return PGM_REGION_SECOND_TRANS;
1303         break;
1304     case ASCE_TYPE_REGION3:
1305         if (vaddr.rfx || vaddr.rsx)
1306             return PGM_ASCE_TYPE;
1307         if (vaddr.rtx01 > asce.tl)
1308             return PGM_REGION_THIRD_TRANS;
1309         break;
1310     case ASCE_TYPE_SEGMENT:
1311         if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
1312             return PGM_ASCE_TYPE;
1313         if (vaddr.sx01 > asce.tl)
1314             return PGM_SEGMENT_TRANSLATION;
1315         break;
1316     }
1317 
1318     switch (asce.dt) {
1319     case ASCE_TYPE_REGION1: {
1320         union region1_table_entry rfte;
1321 
1322         if (*fake) {
1323             ptr += vaddr.rfx * _REGION1_SIZE;
1324             rfte.val = ptr;
1325             goto shadow_r2t;
1326         }
1327         *pgt = ptr + vaddr.rfx * 8;
1328         rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
1329         if (rc)
1330             return rc;
1331         if (rfte.i)
1332             return PGM_REGION_FIRST_TRANS;
1333         if (rfte.tt != TABLE_TYPE_REGION1)
1334             return PGM_TRANSLATION_SPEC;
1335         if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
1336             return PGM_REGION_SECOND_TRANS;
1337         if (sg->edat_level >= 1)
1338             *dat_protection |= rfte.p;
1339         ptr = rfte.rto * PAGE_SIZE;
1340 shadow_r2t:
1341         rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
1342         if (rc)
1343             return rc;
1344     }
1345         fallthrough;
1346     case ASCE_TYPE_REGION2: {
1347         union region2_table_entry rste;
1348 
1349         if (*fake) {
1350             ptr += vaddr.rsx * _REGION2_SIZE;
1351             rste.val = ptr;
1352             goto shadow_r3t;
1353         }
1354         *pgt = ptr + vaddr.rsx * 8;
1355         rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
1356         if (rc)
1357             return rc;
1358         if (rste.i)
1359             return PGM_REGION_SECOND_TRANS;
1360         if (rste.tt != TABLE_TYPE_REGION2)
1361             return PGM_TRANSLATION_SPEC;
1362         if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
1363             return PGM_REGION_THIRD_TRANS;
1364         if (sg->edat_level >= 1)
1365             *dat_protection |= rste.p;
1366         ptr = rste.rto * PAGE_SIZE;
1367 shadow_r3t:
1368         rste.p |= *dat_protection;
1369         rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
1370         if (rc)
1371             return rc;
1372     }
1373         fallthrough;
1374     case ASCE_TYPE_REGION3: {
1375         union region3_table_entry rtte;
1376 
1377         if (*fake) {
1378             ptr += vaddr.rtx * _REGION3_SIZE;
1379             rtte.val = ptr;
1380             goto shadow_sgt;
1381         }
1382         *pgt = ptr + vaddr.rtx * 8;
1383         rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
1384         if (rc)
1385             return rc;
1386         if (rtte.i)
1387             return PGM_REGION_THIRD_TRANS;
1388         if (rtte.tt != TABLE_TYPE_REGION3)
1389             return PGM_TRANSLATION_SPEC;
1390         if (rtte.cr && asce.p && sg->edat_level >= 2)
1391             return PGM_TRANSLATION_SPEC;
1392         if (rtte.fc && sg->edat_level >= 2) {
1393             *dat_protection |= rtte.fc0.p;
1394             *fake = 1;
1395             ptr = rtte.fc1.rfaa * _REGION3_SIZE;
1396             rtte.val = ptr;
1397             goto shadow_sgt;
1398         }
1399         if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
1400             return PGM_SEGMENT_TRANSLATION;
1401         if (sg->edat_level >= 1)
1402             *dat_protection |= rtte.fc0.p;
1403         ptr = rtte.fc0.sto * PAGE_SIZE;
1404 shadow_sgt:
1405         rtte.fc0.p |= *dat_protection;
1406         rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
1407         if (rc)
1408             return rc;
1409     }
1410         fallthrough;
1411     case ASCE_TYPE_SEGMENT: {
1412         union segment_table_entry ste;
1413 
1414         if (*fake) {
1415             ptr += vaddr.sx * _SEGMENT_SIZE;
1416             ste.val = ptr;
1417             goto shadow_pgt;
1418         }
1419         *pgt = ptr + vaddr.sx * 8;
1420         rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
1421         if (rc)
1422             return rc;
1423         if (ste.i)
1424             return PGM_SEGMENT_TRANSLATION;
1425         if (ste.tt != TABLE_TYPE_SEGMENT)
1426             return PGM_TRANSLATION_SPEC;
1427         if (ste.cs && asce.p)
1428             return PGM_TRANSLATION_SPEC;
1429         *dat_protection |= ste.fc0.p;
1430         if (ste.fc && sg->edat_level >= 1) {
1431             *fake = 1;
1432             ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
1433             ste.val = ptr;
1434             goto shadow_pgt;
1435         }
1436         ptr = ste.fc0.pto * (PAGE_SIZE / 2);
1437 shadow_pgt:
1438         ste.fc0.p |= *dat_protection;
1439         rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
1440         if (rc)
1441             return rc;
1442     }
1443     }
1444     /* Return the parent address of the page table */
1445     *pgt = ptr;
1446     return 0;
1447 }
1448 
1449 /**
1450  * kvm_s390_shadow_fault - handle fault on a shadow page table
1451  * @vcpu: virtual cpu
1452  * @sg: pointer to the shadow guest address space structure
1453  * @saddr: faulting address in the shadow gmap
1454  * @datptr: will contain the address of the faulting DAT table entry, or of
1455  *      the valid leaf, plus some flags
1456  *
1457  * Returns: - 0 if the shadow fault was successfully resolved
1458  *      - > 0 (pgm exception code) on exceptions while faulting
1459  *      - -EAGAIN if the caller can retry immediately
1460  *      - -EFAULT when accessing invalid guest addresses
1461  *      - -ENOMEM if out of memory
1462  */
1463 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
1464               unsigned long saddr, unsigned long *datptr)
1465 {
1466     union vaddress vaddr;
1467     union page_table_entry pte;
1468     unsigned long pgt = 0;
1469     int dat_protection, fake;
1470     int rc;
1471 
1472     mmap_read_lock(sg->mm);
1473     /*
1474      * We don't want any guest-2 tables to change - so the parent
1475      * tables/pointers we read stay valid - unshadowing is however
1476      * always possible - only guest_table_lock protects us.
1477      */
1478     ipte_lock(vcpu->kvm);
1479 
1480     rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
1481     if (rc)
1482         rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
1483                         &fake);
1484 
1485     vaddr.addr = saddr;
1486     if (fake) {
1487         pte.val = pgt + vaddr.px * PAGE_SIZE;
1488         goto shadow_page;
1489     }
1490 
1491     switch (rc) {
1492     case PGM_SEGMENT_TRANSLATION:
1493     case PGM_REGION_THIRD_TRANS:
1494     case PGM_REGION_SECOND_TRANS:
1495     case PGM_REGION_FIRST_TRANS:
1496         pgt |= PEI_NOT_PTE;
1497         break;
1498     case 0:
1499         pgt += vaddr.px * 8;
1500         rc = gmap_read_table(sg->parent, pgt, &pte.val);
1501     }
1502     if (datptr)
1503         *datptr = pgt | dat_protection * PEI_DAT_PROT;
1504     if (!rc && pte.i)
1505         rc = PGM_PAGE_TRANSLATION;
1506     if (!rc && pte.z)
1507         rc = PGM_TRANSLATION_SPEC;
1508 shadow_page:
1509     pte.p |= dat_protection;
1510     if (!rc)
1511         rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
1512     ipte_unlock(vcpu->kvm);
1513     mmap_read_unlock(sg->mm);
1514     return rc;
1515 }