Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * store hypervisor information instruction emulation functions.
0004  *
0005  * Copyright IBM Corp. 2016
0006  * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
0007  */
0008 #include <linux/errno.h>
0009 #include <linux/pagemap.h>
0010 #include <linux/vmalloc.h>
0011 #include <linux/syscalls.h>
0012 #include <linux/mutex.h>
0013 #include <asm/asm-offsets.h>
0014 #include <asm/sclp.h>
0015 #include <asm/diag.h>
0016 #include <asm/sysinfo.h>
0017 #include <asm/ebcdic.h>
0018 #include <asm/facility.h>
0019 #include <asm/sthyi.h>
0020 #include "entry.h"
0021 
0022 #define DED_WEIGHT 0xffff
0023 /*
0024  * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
0025  * as they are justified with spaces.
0026  */
0027 #define CP  0xc3d7404040404040UL
0028 #define IFL 0xc9c6d34040404040UL
0029 
0030 enum hdr_flags {
0031     HDR_NOT_LPAR   = 0x10,
0032     HDR_STACK_INCM = 0x20,
0033     HDR_STSI_UNAV  = 0x40,
0034     HDR_PERF_UNAV  = 0x80,
0035 };
0036 
0037 enum mac_validity {
0038     MAC_NAME_VLD = 0x20,
0039     MAC_ID_VLD   = 0x40,
0040     MAC_CNT_VLD  = 0x80,
0041 };
0042 
0043 enum par_flag {
0044     PAR_MT_EN = 0x80,
0045 };
0046 
0047 enum par_validity {
0048     PAR_GRP_VLD  = 0x08,
0049     PAR_ID_VLD   = 0x10,
0050     PAR_ABS_VLD  = 0x20,
0051     PAR_WGHT_VLD = 0x40,
0052     PAR_PCNT_VLD  = 0x80,
0053 };
0054 
0055 struct hdr_sctn {
0056     u8 infhflg1;
0057     u8 infhflg2; /* reserved */
0058     u8 infhval1; /* reserved */
0059     u8 infhval2; /* reserved */
0060     u8 reserved[3];
0061     u8 infhygct;
0062     u16 infhtotl;
0063     u16 infhdln;
0064     u16 infmoff;
0065     u16 infmlen;
0066     u16 infpoff;
0067     u16 infplen;
0068     u16 infhoff1;
0069     u16 infhlen1;
0070     u16 infgoff1;
0071     u16 infglen1;
0072     u16 infhoff2;
0073     u16 infhlen2;
0074     u16 infgoff2;
0075     u16 infglen2;
0076     u16 infhoff3;
0077     u16 infhlen3;
0078     u16 infgoff3;
0079     u16 infglen3;
0080     u8 reserved2[4];
0081 } __packed;
0082 
0083 struct mac_sctn {
0084     u8 infmflg1; /* reserved */
0085     u8 infmflg2; /* reserved */
0086     u8 infmval1;
0087     u8 infmval2; /* reserved */
0088     u16 infmscps;
0089     u16 infmdcps;
0090     u16 infmsifl;
0091     u16 infmdifl;
0092     char infmname[8];
0093     char infmtype[4];
0094     char infmmanu[16];
0095     char infmseq[16];
0096     char infmpman[4];
0097     u8 reserved[4];
0098 } __packed;
0099 
0100 struct par_sctn {
0101     u8 infpflg1;
0102     u8 infpflg2; /* reserved */
0103     u8 infpval1;
0104     u8 infpval2; /* reserved */
0105     u16 infppnum;
0106     u16 infpscps;
0107     u16 infpdcps;
0108     u16 infpsifl;
0109     u16 infpdifl;
0110     u16 reserved;
0111     char infppnam[8];
0112     u32 infpwbcp;
0113     u32 infpabcp;
0114     u32 infpwbif;
0115     u32 infpabif;
0116     char infplgnm[8];
0117     u32 infplgcp;
0118     u32 infplgif;
0119 } __packed;
0120 
0121 struct sthyi_sctns {
0122     struct hdr_sctn hdr;
0123     struct mac_sctn mac;
0124     struct par_sctn par;
0125 } __packed;
0126 
0127 struct cpu_inf {
0128     u64 lpar_cap;
0129     u64 lpar_grp_cap;
0130     u64 lpar_weight;
0131     u64 all_weight;
0132     int cpu_num_ded;
0133     int cpu_num_shd;
0134 };
0135 
0136 struct lpar_cpu_inf {
0137     struct cpu_inf cp;
0138     struct cpu_inf ifl;
0139 };
0140 
0141 /*
0142  * STHYI requires extensive locking in the higher hypervisors
0143  * and is very computational/memory expensive. Therefore we
0144  * cache the retrieved data whose valid period is 1s.
0145  */
0146 #define CACHE_VALID_JIFFIES HZ
0147 
0148 struct sthyi_info {
0149     void *info;
0150     unsigned long end;
0151 };
0152 
0153 static DEFINE_MUTEX(sthyi_mutex);
0154 static struct sthyi_info sthyi_cache;
0155 
0156 static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
0157 {
0158     return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
0159 }
0160 
0161 /*
0162  * Scales the cpu capping from the lpar range to the one expected in
0163  * sthyi data.
0164  *
0165  * diag204 reports a cap in hundredths of processor units.
0166  * z/VM's range for one core is 0 - 0x10000.
0167  */
0168 static u32 scale_cap(u32 in)
0169 {
0170     return (0x10000 * in) / 100;
0171 }
0172 
0173 static void fill_hdr(struct sthyi_sctns *sctns)
0174 {
0175     sctns->hdr.infhdln = sizeof(sctns->hdr);
0176     sctns->hdr.infmoff = sizeof(sctns->hdr);
0177     sctns->hdr.infmlen = sizeof(sctns->mac);
0178     sctns->hdr.infplen = sizeof(sctns->par);
0179     sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
0180     sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
0181 }
0182 
0183 static void fill_stsi_mac(struct sthyi_sctns *sctns,
0184               struct sysinfo_1_1_1 *sysinfo)
0185 {
0186     sclp_ocf_cpc_name_copy(sctns->mac.infmname);
0187     if (*(u64 *)sctns->mac.infmname != 0)
0188         sctns->mac.infmval1 |= MAC_NAME_VLD;
0189 
0190     if (stsi(sysinfo, 1, 1, 1))
0191         return;
0192 
0193     memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
0194     memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
0195     memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
0196     memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
0197 
0198     sctns->mac.infmval1 |= MAC_ID_VLD;
0199 }
0200 
0201 static void fill_stsi_par(struct sthyi_sctns *sctns,
0202               struct sysinfo_2_2_2 *sysinfo)
0203 {
0204     if (stsi(sysinfo, 2, 2, 2))
0205         return;
0206 
0207     sctns->par.infppnum = sysinfo->lpar_number;
0208     memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
0209 
0210     sctns->par.infpval1 |= PAR_ID_VLD;
0211 }
0212 
0213 static void fill_stsi(struct sthyi_sctns *sctns)
0214 {
0215     void *sysinfo;
0216 
0217     /* Errors are handled through the validity bits in the response. */
0218     sysinfo = (void *)__get_free_page(GFP_KERNEL);
0219     if (!sysinfo)
0220         return;
0221 
0222     fill_stsi_mac(sctns, sysinfo);
0223     fill_stsi_par(sctns, sysinfo);
0224 
0225     free_pages((unsigned long)sysinfo, 0);
0226 }
0227 
0228 static void fill_diag_mac(struct sthyi_sctns *sctns,
0229               struct diag204_x_phys_block *block,
0230               void *diag224_buf)
0231 {
0232     int i;
0233 
0234     for (i = 0; i < block->hdr.cpus; i++) {
0235         switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
0236         case CP:
0237             if (block->cpus[i].weight == DED_WEIGHT)
0238                 sctns->mac.infmdcps++;
0239             else
0240                 sctns->mac.infmscps++;
0241             break;
0242         case IFL:
0243             if (block->cpus[i].weight == DED_WEIGHT)
0244                 sctns->mac.infmdifl++;
0245             else
0246                 sctns->mac.infmsifl++;
0247             break;
0248         }
0249     }
0250     sctns->mac.infmval1 |= MAC_CNT_VLD;
0251 }
0252 
0253 /* Returns a pointer to the the next partition block. */
0254 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
0255                          bool this_lpar,
0256                          void *diag224_buf,
0257                          struct diag204_x_part_block *block)
0258 {
0259     int i, capped = 0, weight_cp = 0, weight_ifl = 0;
0260     struct cpu_inf *cpu_inf;
0261 
0262     for (i = 0; i < block->hdr.rcpus; i++) {
0263         if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
0264             continue;
0265 
0266         switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
0267         case CP:
0268             cpu_inf = &part_inf->cp;
0269             if (block->cpus[i].cur_weight < DED_WEIGHT)
0270                 weight_cp |= block->cpus[i].cur_weight;
0271             break;
0272         case IFL:
0273             cpu_inf = &part_inf->ifl;
0274             if (block->cpus[i].cur_weight < DED_WEIGHT)
0275                 weight_ifl |= block->cpus[i].cur_weight;
0276             break;
0277         default:
0278             continue;
0279         }
0280 
0281         if (!this_lpar)
0282             continue;
0283 
0284         capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
0285         cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
0286         cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
0287 
0288         if (block->cpus[i].weight == DED_WEIGHT)
0289             cpu_inf->cpu_num_ded += 1;
0290         else
0291             cpu_inf->cpu_num_shd += 1;
0292     }
0293 
0294     if (this_lpar && capped) {
0295         part_inf->cp.lpar_weight = weight_cp;
0296         part_inf->ifl.lpar_weight = weight_ifl;
0297     }
0298     part_inf->cp.all_weight += weight_cp;
0299     part_inf->ifl.all_weight += weight_ifl;
0300     return (struct diag204_x_part_block *)&block->cpus[i];
0301 }
0302 
0303 static void fill_diag(struct sthyi_sctns *sctns)
0304 {
0305     int i, r, pages;
0306     bool this_lpar;
0307     void *diag204_buf;
0308     void *diag224_buf = NULL;
0309     struct diag204_x_info_blk_hdr *ti_hdr;
0310     struct diag204_x_part_block *part_block;
0311     struct diag204_x_phys_block *phys_block;
0312     struct lpar_cpu_inf lpar_inf = {};
0313 
0314     /* Errors are handled through the validity bits in the response. */
0315     pages = diag204((unsigned long)DIAG204_SUBC_RSI |
0316             (unsigned long)DIAG204_INFO_EXT, 0, NULL);
0317     if (pages <= 0)
0318         return;
0319 
0320     diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
0321     if (!diag204_buf)
0322         return;
0323 
0324     r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
0325             (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
0326     if (r < 0)
0327         goto out;
0328 
0329     diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
0330     if (!diag224_buf || diag224(diag224_buf))
0331         goto out;
0332 
0333     ti_hdr = diag204_buf;
0334     part_block = diag204_buf + sizeof(*ti_hdr);
0335 
0336     for (i = 0; i < ti_hdr->npar; i++) {
0337         /*
0338          * For the calling lpar we also need to get the cpu
0339          * caps and weights. The time information block header
0340          * specifies the offset to the partition block of the
0341          * caller lpar, so we know when we process its data.
0342          */
0343         this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
0344         part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
0345                       part_block);
0346     }
0347 
0348     phys_block = (struct diag204_x_phys_block *)part_block;
0349     part_block = diag204_buf + ti_hdr->this_part;
0350     if (part_block->hdr.mtid)
0351         sctns->par.infpflg1 = PAR_MT_EN;
0352 
0353     sctns->par.infpval1 |= PAR_GRP_VLD;
0354     sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
0355     sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
0356     memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
0357            sizeof(sctns->par.infplgnm));
0358 
0359     sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
0360     sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
0361     sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
0362     sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
0363     sctns->par.infpval1 |= PAR_PCNT_VLD;
0364 
0365     sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
0366     sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
0367     sctns->par.infpval1 |= PAR_ABS_VLD;
0368 
0369     /*
0370      * Everything below needs global performance data to be
0371      * meaningful.
0372      */
0373     if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
0374         sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
0375         goto out;
0376     }
0377 
0378     fill_diag_mac(sctns, phys_block, diag224_buf);
0379 
0380     if (lpar_inf.cp.lpar_weight) {
0381         sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
0382             lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
0383     }
0384 
0385     if (lpar_inf.ifl.lpar_weight) {
0386         sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
0387             lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
0388     }
0389     sctns->par.infpval1 |= PAR_WGHT_VLD;
0390 
0391 out:
0392     free_page((unsigned long)diag224_buf);
0393     vfree(diag204_buf);
0394 }
0395 
0396 static int sthyi(u64 vaddr, u64 *rc)
0397 {
0398     union register_pair r1 = { .even = 0, }; /* subcode */
0399     union register_pair r2 = { .even = vaddr, };
0400     int cc;
0401 
0402     asm volatile(
0403         ".insn   rre,0xB2560000,%[r1],%[r2]\n"
0404         "ipm     %[cc]\n"
0405         "srl     %[cc],28\n"
0406         : [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
0407         : [r1] "d" (r1.pair)
0408         : "memory", "cc");
0409     *rc = r2.odd;
0410     return cc;
0411 }
0412 
0413 static int fill_dst(void *dst, u64 *rc)
0414 {
0415     struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
0416 
0417     /*
0418      * If the facility is on, we don't want to emulate the instruction.
0419      * We ask the hypervisor to provide the data.
0420      */
0421     if (test_facility(74))
0422         return sthyi((u64)dst, rc);
0423 
0424     fill_hdr(sctns);
0425     fill_stsi(sctns);
0426     fill_diag(sctns);
0427     *rc = 0;
0428     return 0;
0429 }
0430 
0431 static int sthyi_init_cache(void)
0432 {
0433     if (sthyi_cache.info)
0434         return 0;
0435     sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
0436     if (!sthyi_cache.info)
0437         return -ENOMEM;
0438     sthyi_cache.end = jiffies - 1; /* expired */
0439     return 0;
0440 }
0441 
0442 static int sthyi_update_cache(u64 *rc)
0443 {
0444     int r;
0445 
0446     memset(sthyi_cache.info, 0, PAGE_SIZE);
0447     r = fill_dst(sthyi_cache.info, rc);
0448     if (r)
0449         return r;
0450     sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
0451     return r;
0452 }
0453 
0454 /*
0455  * sthyi_fill - Fill page with data returned by the STHYI instruction
0456  *
0457  * @dst: Pointer to zeroed page
0458  * @rc:  Pointer for storing the return code of the instruction
0459  *
0460  * Fills the destination with system information returned by the STHYI
0461  * instruction. The data is generated by emulation or execution of STHYI,
0462  * if available. The return value is the condition code that would be
0463  * returned, the rc parameter is the return code which is passed in
0464  * register R2 + 1.
0465  */
0466 int sthyi_fill(void *dst, u64 *rc)
0467 {
0468     int r;
0469 
0470     mutex_lock(&sthyi_mutex);
0471     r = sthyi_init_cache();
0472     if (r)
0473         goto out;
0474 
0475     if (time_is_before_jiffies(sthyi_cache.end)) {
0476         /* cache expired */
0477         r = sthyi_update_cache(rc);
0478         if (r)
0479             goto out;
0480     }
0481     *rc = 0;
0482     memcpy(dst, sthyi_cache.info, PAGE_SIZE);
0483 out:
0484     mutex_unlock(&sthyi_mutex);
0485     return r;
0486 }
0487 EXPORT_SYMBOL_GPL(sthyi_fill);
0488 
0489 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
0490         u64 __user *, return_code, unsigned long, flags)
0491 {
0492     u64 sthyi_rc;
0493     void *info;
0494     int r;
0495 
0496     if (flags)
0497         return -EINVAL;
0498     if (function_code != STHYI_FC_CP_IFL_CAP)
0499         return -EOPNOTSUPP;
0500     info = (void *)get_zeroed_page(GFP_KERNEL);
0501     if (!info)
0502         return -ENOMEM;
0503     r = sthyi_fill(info, &sthyi_rc);
0504     if (r < 0)
0505         goto out;
0506     if (return_code && put_user(sthyi_rc, return_code)) {
0507         r = -EFAULT;
0508         goto out;
0509     }
0510     if (copy_to_user(buffer, info, PAGE_SIZE))
0511         r = -EFAULT;
0512 out:
0513     free_page((unsigned long)info);
0514     return r;
0515 }