Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Copyright 2020-21 IBM Corp.
0004  */
0005 
0006 #define pr_fmt(fmt) "vas: " fmt
0007 
0008 #include <linux/module.h>
0009 #include <linux/kernel.h>
0010 #include <linux/export.h>
0011 #include <linux/types.h>
0012 #include <linux/delay.h>
0013 #include <linux/slab.h>
0014 #include <linux/interrupt.h>
0015 #include <linux/irqdomain.h>
0016 #include <asm/machdep.h>
0017 #include <asm/hvcall.h>
0018 #include <asm/plpar_wrappers.h>
0019 #include <asm/firmware.h>
0020 #include <asm/vas.h>
0021 #include "vas.h"
0022 
0023 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
0024 #define VAS_DEFAULT_DOMAIN_ID   0xFFFFFFFFFFFFFFFFul
0025 /* The hypervisor allows one credit per window right now */
0026 #define DEF_WIN_CREDS       1
0027 
0028 static struct vas_all_caps caps_all;
0029 static bool copypaste_feat;
0030 static struct hv_vas_cop_feat_caps hv_cop_caps;
0031 
0032 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
0033 static DEFINE_MUTEX(vas_pseries_mutex);
0034 static bool migration_in_progress;
0035 
0036 static long hcall_return_busy_check(long rc)
0037 {
0038     /* Check if we are stalled for some time */
0039     if (H_IS_LONG_BUSY(rc)) {
0040         msleep(get_longbusy_msecs(rc));
0041         rc = H_BUSY;
0042     } else if (rc == H_BUSY) {
0043         cond_resched();
0044     }
0045 
0046     return rc;
0047 }
0048 
0049 /*
0050  * Allocate VAS window hcall
0051  */
0052 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
0053                      u8 wintype, u16 credits)
0054 {
0055     long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
0056     long rc;
0057 
0058     do {
0059         rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
0060                   credits, domain[0], domain[1], domain[2],
0061                   domain[3], domain[4], domain[5]);
0062 
0063         rc = hcall_return_busy_check(rc);
0064     } while (rc == H_BUSY);
0065 
0066     if (rc == H_SUCCESS) {
0067         if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
0068             pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
0069             return -ENOTSUPP;
0070         }
0071         win->vas_win.winid = retbuf[0];
0072         win->win_addr = retbuf[1];
0073         win->complete_irq = retbuf[2];
0074         win->fault_irq = retbuf[3];
0075         return 0;
0076     }
0077 
0078     pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
0079         rc, wintype, credits);
0080 
0081     return -EIO;
0082 }
0083 
0084 /*
0085  * Deallocate VAS window hcall.
0086  */
0087 static int h_deallocate_vas_window(u64 winid)
0088 {
0089     long rc;
0090 
0091     do {
0092         rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
0093 
0094         rc = hcall_return_busy_check(rc);
0095     } while (rc == H_BUSY);
0096 
0097     if (rc == H_SUCCESS)
0098         return 0;
0099 
0100     pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
0101         rc, winid);
0102     return -EIO;
0103 }
0104 
0105 /*
0106  * Modify VAS window.
0107  * After the window is opened with allocate window hcall, configure it
0108  * with flags and LPAR PID before using.
0109  */
0110 static int h_modify_vas_window(struct pseries_vas_window *win)
0111 {
0112     long rc;
0113 
0114     /*
0115      * AMR value is not supported in Linux VAS implementation.
0116      * The hypervisor ignores it if 0 is passed.
0117      */
0118     do {
0119         rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
0120                     win->vas_win.winid, win->pid, 0,
0121                     VAS_MOD_WIN_FLAGS, 0);
0122 
0123         rc = hcall_return_busy_check(rc);
0124     } while (rc == H_BUSY);
0125 
0126     if (rc == H_SUCCESS)
0127         return 0;
0128 
0129     pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
0130             rc, win->vas_win.winid, win->pid);
0131     return -EIO;
0132 }
0133 
0134 /*
0135  * This hcall is used to determine the capabilities from the hypervisor.
0136  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
0137  * @query_type: If 0 is passed, the hypervisor returns the overall
0138  *      capabilities which provides all feature(s) that are
0139  *      available. Then query the hypervisor to get the
0140  *      corresponding capabilities for the specific feature.
0141  *      Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
0142  *          and VAS GZIP Default capabilities.
0143  *          H_QUERY_NX_CAPABILITIES provides NX GZIP
0144  *          capabilities.
0145  * @result: Return buffer to save capabilities.
0146  */
0147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
0148 {
0149     long rc;
0150 
0151     rc = plpar_hcall_norets(hcall, query_type, result);
0152 
0153     if (rc == H_SUCCESS)
0154         return 0;
0155 
0156     /* H_FUNCTION means HV does not support VAS so don't print an error */
0157     if (rc != H_FUNCTION) {
0158         pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
0159             (hcall == H_QUERY_VAS_CAPABILITIES) ?
0160                 "H_QUERY_VAS_CAPABILITIES" :
0161                 "H_QUERY_NX_CAPABILITIES",
0162             rc, query_type, result);
0163     }
0164 
0165     return -EIO;
0166 }
0167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
0168 
0169 /*
0170  * hcall to get fault CRB from the hypervisor.
0171  */
0172 static int h_get_nx_fault(u32 winid, u64 buffer)
0173 {
0174     long rc;
0175 
0176     rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
0177 
0178     if (rc == H_SUCCESS)
0179         return 0;
0180 
0181     pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
0182         rc, winid, buffer);
0183     return -EIO;
0184 
0185 }
0186 
0187 /*
0188  * Handle the fault interrupt.
0189  * When the fault interrupt is received for each window, query the
0190  * hypervisor to get the fault CRB on the specific fault. Then
0191  * process the CRB by updating CSB or send signal if the user space
0192  * CSB is invalid.
0193  * Note: The hypervisor forwards an interrupt for each fault request.
0194  *  So one fault CRB to process for each H_GET_NX_FAULT hcall.
0195  */
0196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
0197 {
0198     struct pseries_vas_window *txwin = data;
0199     struct coprocessor_request_block crb;
0200     struct vas_user_win_ref *tsk_ref;
0201     int rc;
0202 
0203     rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
0204     if (!rc) {
0205         tsk_ref = &txwin->vas_win.task_ref;
0206         vas_dump_crb(&crb);
0207         vas_update_csb(&crb, tsk_ref);
0208     }
0209 
0210     return IRQ_HANDLED;
0211 }
0212 
0213 /*
0214  * Allocate window and setup IRQ mapping.
0215  */
0216 static int allocate_setup_window(struct pseries_vas_window *txwin,
0217                  u64 *domain, u8 wintype)
0218 {
0219     int rc;
0220 
0221     rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
0222     if (rc)
0223         return rc;
0224     /*
0225      * On PowerVM, the hypervisor setup and forwards the fault
0226      * interrupt per window. So the IRQ setup and fault handling
0227      * will be done for each open window separately.
0228      */
0229     txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
0230     if (!txwin->fault_virq) {
0231         pr_err("Failed irq mapping %d\n", txwin->fault_irq);
0232         rc = -EINVAL;
0233         goto out_win;
0234     }
0235 
0236     txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
0237                 txwin->vas_win.winid);
0238     if (!txwin->name) {
0239         rc = -ENOMEM;
0240         goto out_irq;
0241     }
0242 
0243     rc = request_threaded_irq(txwin->fault_virq, NULL,
0244                   pseries_vas_fault_thread_fn, IRQF_ONESHOT,
0245                   txwin->name, txwin);
0246     if (rc) {
0247         pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
0248                txwin->vas_win.winid, txwin->fault_virq, rc);
0249         goto out_free;
0250     }
0251 
0252     txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
0253 
0254     return 0;
0255 out_free:
0256     kfree(txwin->name);
0257 out_irq:
0258     irq_dispose_mapping(txwin->fault_virq);
0259 out_win:
0260     h_deallocate_vas_window(txwin->vas_win.winid);
0261     return rc;
0262 }
0263 
0264 static inline void free_irq_setup(struct pseries_vas_window *txwin)
0265 {
0266     free_irq(txwin->fault_virq, txwin);
0267     kfree(txwin->name);
0268     irq_dispose_mapping(txwin->fault_virq);
0269 }
0270 
0271 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
0272                           enum vas_cop_type cop_type)
0273 {
0274     long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
0275     struct vas_cop_feat_caps *cop_feat_caps;
0276     struct vas_caps *caps;
0277     struct pseries_vas_window *txwin;
0278     int rc;
0279 
0280     txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
0281     if (!txwin)
0282         return ERR_PTR(-ENOMEM);
0283 
0284     /*
0285      * A VAS window can have many credits which means that many
0286      * requests can be issued simultaneously. But the hypervisor
0287      * restricts one credit per window.
0288      * The hypervisor introduces 2 different types of credits:
0289      * Default credit type (Uses normal priority FIFO):
0290      *  A limited number of credits are assigned to partitions
0291      *  based on processor entitlement. But these credits may be
0292      *  over-committed on a system depends on whether the CPUs
0293      *  are in shared or dedicated modes - that is, more requests
0294      *  may be issued across the system than NX can service at
0295      *  once which can result in paste command failure (RMA_busy).
0296      *  Then the process has to resend requests or fall-back to
0297      *  SW compression.
0298      * Quality of Service (QoS) credit type (Uses high priority FIFO):
0299      *  To avoid NX HW contention, the system admins can assign
0300      *  QoS credits for each LPAR so that this partition is
0301      *  guaranteed access to NX resources. These credits are
0302      *  assigned to partitions via the HMC.
0303      *  Refer PAPR for more information.
0304      *
0305      * Allocate window with QoS credits if user requested. Otherwise
0306      * default credits are used.
0307      */
0308     if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
0309         caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
0310     else
0311         caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
0312 
0313     cop_feat_caps = &caps->caps;
0314 
0315     if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
0316             atomic_read(&cop_feat_caps->nr_total_credits)) {
0317         pr_err("Credits are not available to allocate window\n");
0318         rc = -EINVAL;
0319         goto out;
0320     }
0321 
0322     if (vas_id == -1) {
0323         /*
0324          * The user space is requesting to allocate a window on
0325          * a VAS instance where the process is executing.
0326          * On PowerVM, domain values are passed to the hypervisor
0327          * to select VAS instance. Useful if the process is
0328          * affinity to NUMA node.
0329          * The hypervisor selects VAS instance if
0330          * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
0331          * The h_allocate_vas_window hcall is defined to take a
0332          * domain values as specified by h_home_node_associativity,
0333          * So no unpacking needs to be done.
0334          */
0335         rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
0336                   VPHN_FLAG_VCPU, smp_processor_id());
0337         if (rc != H_SUCCESS) {
0338             pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
0339             goto out;
0340         }
0341     }
0342 
0343     txwin->pid = mfspr(SPRN_PID);
0344 
0345     /*
0346      * Allocate / Deallocate window hcalls and setup / free IRQs
0347      * have to be protected with mutex.
0348      * Open VAS window: Allocate window hcall and setup IRQ
0349      * Close VAS window: Deallocate window hcall and free IRQ
0350      *  The hypervisor waits until all NX requests are
0351      *  completed before closing the window. So expects OS
0352      *  to handle NX faults, means IRQ can be freed only
0353      *  after the deallocate window hcall is returned.
0354      * So once the window is closed with deallocate hcall before
0355      * the IRQ is freed, it can be assigned to new allocate
0356      * hcall with the same fault IRQ by the hypervisor. It can
0357      * result in setup IRQ fail for the new window since the
0358      * same fault IRQ is not freed by the OS before.
0359      */
0360     mutex_lock(&vas_pseries_mutex);
0361     if (migration_in_progress)
0362         rc = -EBUSY;
0363     else
0364         rc = allocate_setup_window(txwin, (u64 *)&domain[0],
0365                    cop_feat_caps->win_type);
0366     mutex_unlock(&vas_pseries_mutex);
0367     if (rc)
0368         goto out;
0369 
0370     /*
0371      * Modify window and it is ready to use.
0372      */
0373     rc = h_modify_vas_window(txwin);
0374     if (!rc)
0375         rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
0376     if (rc)
0377         goto out_free;
0378 
0379     txwin->win_type = cop_feat_caps->win_type;
0380     mutex_lock(&vas_pseries_mutex);
0381     /*
0382      * Possible to lose the acquired credit with DLPAR core
0383      * removal after the window is opened. So if there are any
0384      * closed windows (means with lost credits), do not give new
0385      * window to user space. New windows will be opened only
0386      * after the existing windows are reopened when credits are
0387      * available.
0388      */
0389     if (!caps->nr_close_wins) {
0390         list_add(&txwin->win_list, &caps->list);
0391         caps->nr_open_windows++;
0392         mutex_unlock(&vas_pseries_mutex);
0393         vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
0394         return &txwin->vas_win;
0395     }
0396     mutex_unlock(&vas_pseries_mutex);
0397 
0398     put_vas_user_win_ref(&txwin->vas_win.task_ref);
0399     rc = -EBUSY;
0400     pr_err("No credit is available to allocate window\n");
0401 
0402 out_free:
0403     /*
0404      * Window is not operational. Free IRQ before closing
0405      * window so that do not have to hold mutex.
0406      */
0407     free_irq_setup(txwin);
0408     h_deallocate_vas_window(txwin->vas_win.winid);
0409 out:
0410     atomic_dec(&cop_feat_caps->nr_used_credits);
0411     kfree(txwin);
0412     return ERR_PTR(rc);
0413 }
0414 
0415 static u64 vas_paste_address(struct vas_window *vwin)
0416 {
0417     struct pseries_vas_window *win;
0418 
0419     win = container_of(vwin, struct pseries_vas_window, vas_win);
0420     return win->win_addr;
0421 }
0422 
0423 static int deallocate_free_window(struct pseries_vas_window *win)
0424 {
0425     int rc = 0;
0426 
0427     /*
0428      * The hypervisor waits for all requests including faults
0429      * are processed before closing the window - Means all
0430      * credits have to be returned. In the case of fault
0431      * request, a credit is returned after OS issues
0432      * H_GET_NX_FAULT hcall.
0433      * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
0434      * hcall.
0435      */
0436     rc = h_deallocate_vas_window(win->vas_win.winid);
0437     if (!rc)
0438         free_irq_setup(win);
0439 
0440     return rc;
0441 }
0442 
0443 static int vas_deallocate_window(struct vas_window *vwin)
0444 {
0445     struct pseries_vas_window *win;
0446     struct vas_cop_feat_caps *caps;
0447     int rc = 0;
0448 
0449     if (!vwin)
0450         return -EINVAL;
0451 
0452     win = container_of(vwin, struct pseries_vas_window, vas_win);
0453 
0454     /* Should not happen */
0455     if (win->win_type >= VAS_MAX_FEAT_TYPE) {
0456         pr_err("Window (%u): Invalid window type %u\n",
0457                 vwin->winid, win->win_type);
0458         return -EINVAL;
0459     }
0460 
0461     caps = &vascaps[win->win_type].caps;
0462     mutex_lock(&vas_pseries_mutex);
0463     /*
0464      * VAS window is already closed in the hypervisor when
0465      * lost the credit or with migration. So just remove the entry
0466      * from the list, remove task references and free vas_window
0467      * struct.
0468      */
0469     if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
0470         !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
0471         rc = deallocate_free_window(win);
0472         if (rc) {
0473             mutex_unlock(&vas_pseries_mutex);
0474             return rc;
0475         }
0476     } else
0477         vascaps[win->win_type].nr_close_wins--;
0478 
0479     list_del(&win->win_list);
0480     atomic_dec(&caps->nr_used_credits);
0481     vascaps[win->win_type].nr_open_windows--;
0482     mutex_unlock(&vas_pseries_mutex);
0483 
0484     put_vas_user_win_ref(&vwin->task_ref);
0485     mm_context_remove_vas_window(vwin->task_ref.mm);
0486 
0487     kfree(win);
0488     return 0;
0489 }
0490 
0491 static const struct vas_user_win_ops vops_pseries = {
0492     .open_win   = vas_allocate_window,  /* Open and configure window */
0493     .paste_addr = vas_paste_address,    /* To do copy/paste */
0494     .close_win  = vas_deallocate_window, /* Close window */
0495 };
0496 
0497 /*
0498  * Supporting only nx-gzip coprocessor type now, but this API code
0499  * extended to other coprocessor types later.
0500  */
0501 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
0502                  const char *name)
0503 {
0504     int rc;
0505 
0506     if (!copypaste_feat)
0507         return -ENOTSUPP;
0508 
0509     rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
0510 
0511     return rc;
0512 }
0513 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
0514 
0515 void vas_unregister_api_pseries(void)
0516 {
0517     vas_unregister_coproc_api();
0518 }
0519 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
0520 
0521 /*
0522  * Get the specific capabilities based on the feature type.
0523  * Right now supports GZIP default and GZIP QoS capabilities.
0524  */
0525 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
0526                 struct hv_vas_cop_feat_caps *hv_caps)
0527 {
0528     struct vas_cop_feat_caps *caps;
0529     struct vas_caps *vcaps;
0530     int rc = 0;
0531 
0532     vcaps = &vascaps[type];
0533     memset(vcaps, 0, sizeof(*vcaps));
0534     INIT_LIST_HEAD(&vcaps->list);
0535 
0536     vcaps->feat = feat;
0537     caps = &vcaps->caps;
0538 
0539     rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
0540                       (u64)virt_to_phys(hv_caps));
0541     if (rc)
0542         return rc;
0543 
0544     caps->user_mode = hv_caps->user_mode;
0545     if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
0546         pr_err("User space COPY/PASTE is not supported\n");
0547         return -ENOTSUPP;
0548     }
0549 
0550     caps->descriptor = be64_to_cpu(hv_caps->descriptor);
0551     caps->win_type = hv_caps->win_type;
0552     if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
0553         pr_err("Unsupported window type %u\n", caps->win_type);
0554         return -EINVAL;
0555     }
0556     caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
0557     caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
0558     atomic_set(&caps->nr_total_credits,
0559            be16_to_cpu(hv_caps->target_lpar_creds));
0560     if (feat == VAS_GZIP_DEF_FEAT) {
0561         caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
0562 
0563         if (caps->max_win_creds < DEF_WIN_CREDS) {
0564             pr_err("Window creds(%u) > max allowed window creds(%u)\n",
0565                    DEF_WIN_CREDS, caps->max_win_creds);
0566             return -EINVAL;
0567         }
0568     }
0569 
0570     rc = sysfs_add_vas_caps(caps);
0571     if (rc)
0572         return rc;
0573 
0574     copypaste_feat = true;
0575 
0576     return 0;
0577 }
0578 
0579 /*
0580  * VAS windows can be closed due to lost credits when the core is
0581  * removed. So reopen them if credits are available due to DLPAR
0582  * core add and set the window active status. When NX sees the page
0583  * fault on the unmapped paste address, the kernel handles the fault
0584  * by setting the remapping to new paste address if the window is
0585  * active.
0586  */
0587 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
0588                  bool migrate)
0589 {
0590     long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
0591     struct vas_cop_feat_caps *caps = &vcaps->caps;
0592     struct pseries_vas_window *win = NULL, *tmp;
0593     int rc, mv_ents = 0;
0594     int flag;
0595 
0596     /*
0597      * Nothing to do if there are no closed windows.
0598      */
0599     if (!vcaps->nr_close_wins)
0600         return 0;
0601 
0602     /*
0603      * For the core removal, the hypervisor reduces the credits
0604      * assigned to the LPAR and the kernel closes VAS windows
0605      * in the hypervisor depends on reduced credits. The kernel
0606      * uses LIFO (the last windows that are opened will be closed
0607      * first) and expects to open in the same order when credits
0608      * are available.
0609      * For example, 40 windows are closed when the LPAR lost 2 cores
0610      * (dedicated). If 1 core is added, this LPAR can have 20 more
0611      * credits. It means the kernel can reopen 20 windows. So move
0612      * 20 entries in the VAS windows lost and reopen next 20 windows.
0613      * For partition migration, reopen all windows that are closed
0614      * during resume.
0615      */
0616     if ((vcaps->nr_close_wins > creds) && !migrate)
0617         mv_ents = vcaps->nr_close_wins - creds;
0618 
0619     list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
0620         if (!mv_ents)
0621             break;
0622 
0623         mv_ents--;
0624     }
0625 
0626     /*
0627      * Open windows if they are closed only with migration or
0628      * DLPAR (lost credit) before.
0629      */
0630     if (migrate)
0631         flag = VAS_WIN_MIGRATE_CLOSE;
0632     else
0633         flag = VAS_WIN_NO_CRED_CLOSE;
0634 
0635     list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
0636         /*
0637          * This window is closed with DLPAR and migration events.
0638          * So reopen the window with the last event.
0639          * The user space is not suspended with the current
0640          * migration notifier. So the user space can issue DLPAR
0641          * CPU hotplug while migration in progress. In this case
0642          * this window will be opened with the last event.
0643          */
0644         if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
0645             (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
0646             win->vas_win.status &= ~flag;
0647             continue;
0648         }
0649 
0650         /*
0651          * Nothing to do on this window if it is not closed
0652          * with this flag
0653          */
0654         if (!(win->vas_win.status & flag))
0655             continue;
0656 
0657         rc = allocate_setup_window(win, (u64 *)&domain[0],
0658                        caps->win_type);
0659         if (rc)
0660             return rc;
0661 
0662         rc = h_modify_vas_window(win);
0663         if (rc)
0664             goto out;
0665 
0666         mutex_lock(&win->vas_win.task_ref.mmap_mutex);
0667         /*
0668          * Set window status to active
0669          */
0670         win->vas_win.status &= ~flag;
0671         mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
0672         win->win_type = caps->win_type;
0673         if (!--vcaps->nr_close_wins)
0674             break;
0675     }
0676 
0677     return 0;
0678 out:
0679     /*
0680      * Window modify HCALL failed. So close the window to the
0681      * hypervisor and return.
0682      */
0683     free_irq_setup(win);
0684     h_deallocate_vas_window(win->vas_win.winid);
0685     return rc;
0686 }
0687 
0688 /*
0689  * The hypervisor reduces the available credits if the LPAR lost core. It
0690  * means the excessive windows should not be active and the user space
0691  * should not be using these windows to send compression requests to NX.
0692  * So the kernel closes the excessive windows and unmap the paste address
0693  * such that the user space receives paste instruction failure. Then up to
0694  * the user space to fall back to SW compression and manage with the
0695  * existing windows.
0696  */
0697 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
0698                                     bool migrate)
0699 {
0700     struct pseries_vas_window *win, *tmp;
0701     struct vas_user_win_ref *task_ref;
0702     struct vm_area_struct *vma;
0703     int rc = 0, flag;
0704 
0705     if (migrate)
0706         flag = VAS_WIN_MIGRATE_CLOSE;
0707     else
0708         flag = VAS_WIN_NO_CRED_CLOSE;
0709 
0710     list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
0711         /*
0712          * This window is already closed due to lost credit
0713          * or for migration before. Go for next window.
0714          * For migration, nothing to do since this window
0715          * closed for DLPAR and will be reopened even on
0716          * the destination system with other DLPAR operation.
0717          */
0718         if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
0719             (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
0720             win->vas_win.status |= flag;
0721             continue;
0722         }
0723 
0724         task_ref = &win->vas_win.task_ref;
0725         mutex_lock(&task_ref->mmap_mutex);
0726         vma = task_ref->vma;
0727         /*
0728          * Number of available credits are reduced, So select
0729          * and close windows.
0730          */
0731         win->vas_win.status |= flag;
0732 
0733         mmap_write_lock(task_ref->mm);
0734         /*
0735          * vma is set in the original mapping. But this mapping
0736          * is done with mmap() after the window is opened with ioctl.
0737          * so we may not see the original mapping if the core remove
0738          * is done before the original mmap() and after the ioctl.
0739          */
0740         if (vma)
0741             zap_page_range(vma, vma->vm_start,
0742                     vma->vm_end - vma->vm_start);
0743 
0744         mmap_write_unlock(task_ref->mm);
0745         mutex_unlock(&task_ref->mmap_mutex);
0746         /*
0747          * Close VAS window in the hypervisor, but do not
0748          * free vas_window struct since it may be reused
0749          * when the credit is available later (DLPAR with
0750          * adding cores). This struct will be used
0751          * later when the process issued with close(FD).
0752          */
0753         rc = deallocate_free_window(win);
0754         /*
0755          * This failure is from the hypervisor.
0756          * No way to stop migration for these failures.
0757          * So ignore error and continue closing other windows.
0758          */
0759         if (rc && !migrate)
0760             return rc;
0761 
0762         vcap->nr_close_wins++;
0763 
0764         /*
0765          * For migration, do not depend on lpar_creds in case if
0766          * mismatch with the hypervisor value (should not happen).
0767          * So close all active windows in the list and will be
0768          * reopened windows based on the new lpar_creds on the
0769          * destination system during resume.
0770          */
0771         if (!migrate && !--excess_creds)
0772             break;
0773     }
0774 
0775     return 0;
0776 }
0777 
0778 /*
0779  * Get new VAS capabilities when the core add/removal configuration
0780  * changes. Reconfig window configurations based on the credits
0781  * availability from this new capabilities.
0782  */
0783 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
0784 {
0785     struct vas_cop_feat_caps *caps;
0786     int old_nr_creds;
0787     struct vas_caps *vcaps;
0788     int rc = 0, nr_active_wins;
0789 
0790     if (type >= VAS_MAX_FEAT_TYPE) {
0791         pr_err("Invalid credit type %d\n", type);
0792         return -EINVAL;
0793     }
0794 
0795     vcaps = &vascaps[type];
0796     caps = &vcaps->caps;
0797 
0798     mutex_lock(&vas_pseries_mutex);
0799 
0800     old_nr_creds = atomic_read(&caps->nr_total_credits);
0801 
0802     atomic_set(&caps->nr_total_credits, new_nr_creds);
0803     /*
0804      * The total number of available credits may be decreased or
0805      * increased with DLPAR operation. Means some windows have to be
0806      * closed / reopened. Hold the vas_pseries_mutex so that the
0807      * user space can not open new windows.
0808      */
0809     if (old_nr_creds <  new_nr_creds) {
0810         /*
0811          * If the existing target credits is less than the new
0812          * target, reopen windows if they are closed due to
0813          * the previous DLPAR (core removal).
0814          */
0815         rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
0816                        false);
0817     } else {
0818         /*
0819          * # active windows is more than new LPAR available
0820          * credits. So close the excessive windows.
0821          * On pseries, each window will have 1 credit.
0822          */
0823         nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
0824         if (nr_active_wins > new_nr_creds)
0825             rc = reconfig_close_windows(vcaps,
0826                     nr_active_wins - new_nr_creds,
0827                     false);
0828     }
0829 
0830     mutex_unlock(&vas_pseries_mutex);
0831     return rc;
0832 }
0833 /*
0834  * Total number of default credits available (target_credits)
0835  * in LPAR depends on number of cores configured. It varies based on
0836  * whether processors are in shared mode or dedicated mode.
0837  * Get the notifier when CPU configuration is changed with DLPAR
0838  * operation so that get the new target_credits (vas default capabilities)
0839  * and then update the existing windows usage if needed.
0840  */
0841 static int pseries_vas_notifier(struct notifier_block *nb,
0842                 unsigned long action, void *data)
0843 {
0844     struct of_reconfig_data *rd = data;
0845     struct device_node *dn = rd->dn;
0846     const __be32 *intserv = NULL;
0847     int new_nr_creds, len, rc = 0;
0848 
0849     if ((action == OF_RECONFIG_ATTACH_NODE) ||
0850         (action == OF_RECONFIG_DETACH_NODE))
0851         intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
0852                       &len);
0853     /*
0854      * Processor config is not changed
0855      */
0856     if (!intserv)
0857         return NOTIFY_OK;
0858 
0859     rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
0860                     vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
0861                     (u64)virt_to_phys(&hv_cop_caps));
0862     if (!rc) {
0863         new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
0864         rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
0865                         new_nr_creds);
0866     }
0867 
0868     if (rc)
0869         pr_err("Failed reconfig VAS capabilities with DLPAR\n");
0870 
0871     return rc;
0872 }
0873 
0874 static struct notifier_block pseries_vas_nb = {
0875     .notifier_call = pseries_vas_notifier,
0876 };
0877 
0878 /*
0879  * For LPM, all windows have to be closed on the source partition
0880  * before migration and reopen them on the destination partition
0881  * after migration. So closing windows during suspend and
0882  * reopen them during resume.
0883  */
0884 int vas_migration_handler(int action)
0885 {
0886     struct vas_cop_feat_caps *caps;
0887     int old_nr_creds, new_nr_creds = 0;
0888     struct vas_caps *vcaps;
0889     int i, rc = 0;
0890 
0891     /*
0892      * NX-GZIP is not enabled. Nothing to do for migration.
0893      */
0894     if (!copypaste_feat)
0895         return rc;
0896 
0897     mutex_lock(&vas_pseries_mutex);
0898 
0899     if (action == VAS_SUSPEND)
0900         migration_in_progress = true;
0901     else
0902         migration_in_progress = false;
0903 
0904     for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
0905         vcaps = &vascaps[i];
0906         caps = &vcaps->caps;
0907         old_nr_creds = atomic_read(&caps->nr_total_credits);
0908 
0909         rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
0910                           vcaps->feat,
0911                           (u64)virt_to_phys(&hv_cop_caps));
0912         if (!rc) {
0913             new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
0914             /*
0915              * Should not happen. But incase print messages, close
0916              * all windows in the list during suspend and reopen
0917              * windows based on new lpar_creds on the destination
0918              * system.
0919              */
0920             if (old_nr_creds != new_nr_creds) {
0921                 pr_err("Target credits mismatch with the hypervisor\n");
0922                 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
0923                     action, old_nr_creds, new_nr_creds);
0924                 pr_err("Used creds: %d, Active creds: %d\n",
0925                     atomic_read(&caps->nr_used_credits),
0926                     vcaps->nr_open_windows - vcaps->nr_close_wins);
0927             }
0928         } else {
0929             pr_err("state(%d): Get VAS capabilities failed with %d\n",
0930                 action, rc);
0931             /*
0932              * We can not stop migration with the current lpm
0933              * implementation. So continue closing all windows in
0934              * the list (during suspend) and return without
0935              * opening windows (during resume) if VAS capabilities
0936              * HCALL failed.
0937              */
0938             if (action == VAS_RESUME)
0939                 goto out;
0940         }
0941 
0942         switch (action) {
0943         case VAS_SUSPEND:
0944             rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
0945                             true);
0946             break;
0947         case VAS_RESUME:
0948             atomic_set(&caps->nr_total_credits, new_nr_creds);
0949             rc = reconfig_open_windows(vcaps, new_nr_creds, true);
0950             break;
0951         default:
0952             /* should not happen */
0953             pr_err("Invalid migration action %d\n", action);
0954             rc = -EINVAL;
0955             goto out;
0956         }
0957 
0958         /*
0959          * Ignore errors during suspend and return for resume.
0960          */
0961         if (rc && (action == VAS_RESUME))
0962             goto out;
0963     }
0964 
0965 out:
0966     mutex_unlock(&vas_pseries_mutex);
0967     return rc;
0968 }
0969 
0970 static int __init pseries_vas_init(void)
0971 {
0972     struct hv_vas_all_caps *hv_caps;
0973     int rc = 0;
0974 
0975     /*
0976      * Linux supports user space COPY/PASTE only with Radix
0977      */
0978     if (!radix_enabled()) {
0979         pr_err("API is supported only with radix page tables\n");
0980         return -ENOTSUPP;
0981     }
0982 
0983     hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
0984     if (!hv_caps)
0985         return -ENOMEM;
0986     /*
0987      * Get VAS overall capabilities by passing 0 to feature type.
0988      */
0989     rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
0990                       (u64)virt_to_phys(hv_caps));
0991     if (rc)
0992         goto out;
0993 
0994     caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
0995     caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
0996 
0997     sysfs_pseries_vas_init(&caps_all);
0998 
0999     /*
1000      * QOS capabilities available
1001      */
1002     if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1003         rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1004                       VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1005 
1006         if (rc)
1007             goto out;
1008     }
1009     /*
1010      * Default capabilities available
1011      */
1012     if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1013         rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1014                       VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1015 
1016     if (!rc && copypaste_feat) {
1017         if (firmware_has_feature(FW_FEATURE_LPAR))
1018             of_reconfig_notifier_register(&pseries_vas_nb);
1019 
1020         pr_info("GZIP feature is available\n");
1021     } else {
1022         /*
1023          * Should not happen, but only when get default
1024          * capabilities HCALL failed. So disable copy paste
1025          * feature.
1026          */
1027         copypaste_feat = false;
1028     }
1029 
1030 out:
1031     kfree(hv_caps);
1032     return rc;
1033 }
1034 machine_device_initcall(pseries, pseries_vas_init);