Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * SN Platform GRU Driver
0004  *
0005  *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
0006  *
0007  * This file contains code that handles TLB misses within the GRU.
0008  * These misses are reported either via interrupts or user polling of
0009  * the user CB.
0010  *
0011  *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
0012  */
0013 
0014 #include <linux/kernel.h>
0015 #include <linux/errno.h>
0016 #include <linux/spinlock.h>
0017 #include <linux/mm.h>
0018 #include <linux/hugetlb.h>
0019 #include <linux/device.h>
0020 #include <linux/io.h>
0021 #include <linux/uaccess.h>
0022 #include <linux/security.h>
0023 #include <linux/sync_core.h>
0024 #include <linux/prefetch.h>
0025 #include "gru.h"
0026 #include "grutables.h"
0027 #include "grulib.h"
0028 #include "gru_instructions.h"
0029 #include <asm/uv/uv_hub.h>
0030 
0031 /* Return codes for vtop functions */
0032 #define VTOP_SUCCESS               0
0033 #define VTOP_INVALID               -1
0034 #define VTOP_RETRY                 -2
0035 
0036 
0037 /*
0038  * Test if a physical address is a valid GRU GSEG address
0039  */
0040 static inline int is_gru_paddr(unsigned long paddr)
0041 {
0042     return paddr >= gru_start_paddr && paddr < gru_end_paddr;
0043 }
0044 
0045 /*
0046  * Find the vma of a GRU segment. Caller must hold mmap_lock.
0047  */
0048 struct vm_area_struct *gru_find_vma(unsigned long vaddr)
0049 {
0050     struct vm_area_struct *vma;
0051 
0052     vma = vma_lookup(current->mm, vaddr);
0053     if (vma && vma->vm_ops == &gru_vm_ops)
0054         return vma;
0055     return NULL;
0056 }
0057 
0058 /*
0059  * Find and lock the gts that contains the specified user vaddr.
0060  *
0061  * Returns:
0062  *  - *gts with the mmap_lock locked for read and the GTS locked.
0063  *  - NULL if vaddr invalid OR is not a valid GSEG vaddr.
0064  */
0065 
0066 static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
0067 {
0068     struct mm_struct *mm = current->mm;
0069     struct vm_area_struct *vma;
0070     struct gru_thread_state *gts = NULL;
0071 
0072     mmap_read_lock(mm);
0073     vma = gru_find_vma(vaddr);
0074     if (vma)
0075         gts = gru_find_thread_state(vma, TSID(vaddr, vma));
0076     if (gts)
0077         mutex_lock(&gts->ts_ctxlock);
0078     else
0079         mmap_read_unlock(mm);
0080     return gts;
0081 }
0082 
0083 static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
0084 {
0085     struct mm_struct *mm = current->mm;
0086     struct vm_area_struct *vma;
0087     struct gru_thread_state *gts = ERR_PTR(-EINVAL);
0088 
0089     mmap_write_lock(mm);
0090     vma = gru_find_vma(vaddr);
0091     if (!vma)
0092         goto err;
0093 
0094     gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
0095     if (IS_ERR(gts))
0096         goto err;
0097     mutex_lock(&gts->ts_ctxlock);
0098     mmap_write_downgrade(mm);
0099     return gts;
0100 
0101 err:
0102     mmap_write_unlock(mm);
0103     return gts;
0104 }
0105 
0106 /*
0107  * Unlock a GTS that was previously locked with gru_find_lock_gts().
0108  */
0109 static void gru_unlock_gts(struct gru_thread_state *gts)
0110 {
0111     mutex_unlock(&gts->ts_ctxlock);
0112     mmap_read_unlock(current->mm);
0113 }
0114 
0115 /*
0116  * Set a CB.istatus to active using a user virtual address. This must be done
0117  * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
0118  * If the line is evicted, the status may be lost. The in-cache update
0119  * is necessary to prevent the user from seeing a stale cb.istatus that will
0120  * change as soon as the TFH restart is complete. Races may cause an
0121  * occasional failure to clear the cb.istatus, but that is ok.
0122  */
0123 static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
0124 {
0125     if (cbk) {
0126         cbk->istatus = CBS_ACTIVE;
0127     }
0128 }
0129 
0130 /*
0131  * Read & clear a TFM
0132  *
0133  * The GRU has an array of fault maps. A map is private to a cpu
0134  * Only one cpu will be accessing a cpu's fault map.
0135  *
0136  * This function scans the cpu-private fault map & clears all bits that
0137  * are set. The function returns a bitmap that indicates the bits that
0138  * were cleared. Note that sense the maps may be updated asynchronously by
0139  * the GRU, atomic operations must be used to clear bits.
0140  */
0141 static void get_clear_fault_map(struct gru_state *gru,
0142                 struct gru_tlb_fault_map *imap,
0143                 struct gru_tlb_fault_map *dmap)
0144 {
0145     unsigned long i, k;
0146     struct gru_tlb_fault_map *tfm;
0147 
0148     tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
0149     prefetchw(tfm);     /* Helps on hardware, required for emulator */
0150     for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
0151         k = tfm->fault_bits[i];
0152         if (k)
0153             k = xchg(&tfm->fault_bits[i], 0UL);
0154         imap->fault_bits[i] = k;
0155         k = tfm->done_bits[i];
0156         if (k)
0157             k = xchg(&tfm->done_bits[i], 0UL);
0158         dmap->fault_bits[i] = k;
0159     }
0160 
0161     /*
0162      * Not functionally required but helps performance. (Required
0163      * on emulator)
0164      */
0165     gru_flush_cache(tfm);
0166 }
0167 
0168 /*
0169  * Atomic (interrupt context) & non-atomic (user context) functions to
0170  * convert a vaddr into a physical address. The size of the page
0171  * is returned in pageshift.
0172  *  returns:
0173  *        0 - successful
0174  *      < 0 - error code
0175  *        1 - (atomic only) try again in non-atomic context
0176  */
0177 static int non_atomic_pte_lookup(struct vm_area_struct *vma,
0178                  unsigned long vaddr, int write,
0179                  unsigned long *paddr, int *pageshift)
0180 {
0181     struct page *page;
0182 
0183 #ifdef CONFIG_HUGETLB_PAGE
0184     *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
0185 #else
0186     *pageshift = PAGE_SHIFT;
0187 #endif
0188     if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
0189         return -EFAULT;
0190     *paddr = page_to_phys(page);
0191     put_page(page);
0192     return 0;
0193 }
0194 
0195 /*
0196  * atomic_pte_lookup
0197  *
0198  * Convert a user virtual address to a physical address
0199  * Only supports Intel large pages (2MB only) on x86_64.
0200  *  ZZZ - hugepage support is incomplete
0201  *
0202  * NOTE: mmap_lock is already held on entry to this function. This
0203  * guarantees existence of the page tables.
0204  */
0205 static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
0206     int write, unsigned long *paddr, int *pageshift)
0207 {
0208     pgd_t *pgdp;
0209     p4d_t *p4dp;
0210     pud_t *pudp;
0211     pmd_t *pmdp;
0212     pte_t pte;
0213 
0214     pgdp = pgd_offset(vma->vm_mm, vaddr);
0215     if (unlikely(pgd_none(*pgdp)))
0216         goto err;
0217 
0218     p4dp = p4d_offset(pgdp, vaddr);
0219     if (unlikely(p4d_none(*p4dp)))
0220         goto err;
0221 
0222     pudp = pud_offset(p4dp, vaddr);
0223     if (unlikely(pud_none(*pudp)))
0224         goto err;
0225 
0226     pmdp = pmd_offset(pudp, vaddr);
0227     if (unlikely(pmd_none(*pmdp)))
0228         goto err;
0229 #ifdef CONFIG_X86_64
0230     if (unlikely(pmd_large(*pmdp)))
0231         pte = *(pte_t *) pmdp;
0232     else
0233 #endif
0234         pte = *pte_offset_kernel(pmdp, vaddr);
0235 
0236     if (unlikely(!pte_present(pte) ||
0237              (write && (!pte_write(pte) || !pte_dirty(pte)))))
0238         return 1;
0239 
0240     *paddr = pte_pfn(pte) << PAGE_SHIFT;
0241 #ifdef CONFIG_HUGETLB_PAGE
0242     *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
0243 #else
0244     *pageshift = PAGE_SHIFT;
0245 #endif
0246     return 0;
0247 
0248 err:
0249     return 1;
0250 }
0251 
0252 static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
0253             int write, int atomic, unsigned long *gpa, int *pageshift)
0254 {
0255     struct mm_struct *mm = gts->ts_mm;
0256     struct vm_area_struct *vma;
0257     unsigned long paddr;
0258     int ret, ps;
0259 
0260     vma = find_vma(mm, vaddr);
0261     if (!vma)
0262         goto inval;
0263 
0264     /*
0265      * Atomic lookup is faster & usually works even if called in non-atomic
0266      * context.
0267      */
0268     rmb();  /* Must/check ms_range_active before loading PTEs */
0269     ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
0270     if (ret) {
0271         if (atomic)
0272             goto upm;
0273         if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
0274             goto inval;
0275     }
0276     if (is_gru_paddr(paddr))
0277         goto inval;
0278     paddr = paddr & ~((1UL << ps) - 1);
0279     *gpa = uv_soc_phys_ram_to_gpa(paddr);
0280     *pageshift = ps;
0281     return VTOP_SUCCESS;
0282 
0283 inval:
0284     return VTOP_INVALID;
0285 upm:
0286     return VTOP_RETRY;
0287 }
0288 
0289 
0290 /*
0291  * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
0292  * CBE cacheline so that the line will be written back to home agent.
0293  * Otherwise the line may be silently dropped. This has no impact
0294  * except on performance.
0295  */
0296 static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
0297 {
0298     if (unlikely(cbe)) {
0299         cbe->cbrexecstatus = 0;         /* make CL dirty */
0300         gru_flush_cache(cbe);
0301     }
0302 }
0303 
0304 /*
0305  * Preload the TLB with entries that may be required. Currently, preloading
0306  * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
0307  * the end of the bcopy tranfer, whichever is smaller.
0308  */
0309 static void gru_preload_tlb(struct gru_state *gru,
0310             struct gru_thread_state *gts, int atomic,
0311             unsigned long fault_vaddr, int asid, int write,
0312             unsigned char tlb_preload_count,
0313             struct gru_tlb_fault_handle *tfh,
0314             struct gru_control_block_extended *cbe)
0315 {
0316     unsigned long vaddr = 0, gpa;
0317     int ret, pageshift;
0318 
0319     if (cbe->opccpy != OP_BCOPY)
0320         return;
0321 
0322     if (fault_vaddr == cbe->cbe_baddr0)
0323         vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
0324     else if (fault_vaddr == cbe->cbe_baddr1)
0325         vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
0326 
0327     fault_vaddr &= PAGE_MASK;
0328     vaddr &= PAGE_MASK;
0329     vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
0330 
0331     while (vaddr > fault_vaddr) {
0332         ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
0333         if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
0334                       GRU_PAGESIZE(pageshift)))
0335             return;
0336         gru_dbg(grudev,
0337             "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
0338             atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
0339             vaddr, asid, write, pageshift, gpa);
0340         vaddr -= PAGE_SIZE;
0341         STAT(tlb_preload_page);
0342     }
0343 }
0344 
0345 /*
0346  * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
0347  *  Input:
0348  *      cb    Address of user CBR. Null if not running in user context
0349  *  Return:
0350  *        0 = dropin, exception, or switch to UPM successful
0351  *        1 = range invalidate active
0352  *      < 0 = error code
0353  *
0354  */
0355 static int gru_try_dropin(struct gru_state *gru,
0356               struct gru_thread_state *gts,
0357               struct gru_tlb_fault_handle *tfh,
0358               struct gru_instruction_bits *cbk)
0359 {
0360     struct gru_control_block_extended *cbe = NULL;
0361     unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
0362     int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
0363     unsigned long gpa = 0, vaddr = 0;
0364 
0365     /*
0366      * NOTE: The GRU contains magic hardware that eliminates races between
0367      * TLB invalidates and TLB dropins. If an invalidate occurs
0368      * in the window between reading the TFH and the subsequent TLB dropin,
0369      * the dropin is ignored. This eliminates the need for additional locks.
0370      */
0371 
0372     /*
0373      * Prefetch the CBE if doing TLB preloading
0374      */
0375     if (unlikely(tlb_preload_count)) {
0376         cbe = gru_tfh_to_cbe(tfh);
0377         prefetchw(cbe);
0378     }
0379 
0380     /*
0381      * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
0382      * Might be a hardware race OR a stupid user. Ignore FMM because FMM
0383      * is a transient state.
0384      */
0385     if (tfh->status != TFHSTATUS_EXCEPTION) {
0386         gru_flush_cache(tfh);
0387         sync_core();
0388         if (tfh->status != TFHSTATUS_EXCEPTION)
0389             goto failnoexception;
0390         STAT(tfh_stale_on_fault);
0391     }
0392     if (tfh->state == TFHSTATE_IDLE)
0393         goto failidle;
0394     if (tfh->state == TFHSTATE_MISS_FMM && cbk)
0395         goto failfmm;
0396 
0397     write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
0398     vaddr = tfh->missvaddr;
0399     asid = tfh->missasid;
0400     indexway = tfh->indexway;
0401     if (asid == 0)
0402         goto failnoasid;
0403 
0404     rmb();  /* TFH must be cache resident before reading ms_range_active */
0405 
0406     /*
0407      * TFH is cache resident - at least briefly. Fail the dropin
0408      * if a range invalidate is active.
0409      */
0410     if (atomic_read(&gts->ts_gms->ms_range_active))
0411         goto failactive;
0412 
0413     ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
0414     if (ret == VTOP_INVALID)
0415         goto failinval;
0416     if (ret == VTOP_RETRY)
0417         goto failupm;
0418 
0419     if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
0420         gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
0421         if (atomic || !gru_update_cch(gts)) {
0422             gts->ts_force_cch_reload = 1;
0423             goto failupm;
0424         }
0425     }
0426 
0427     if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
0428         gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
0429         gru_flush_cache_cbe(cbe);
0430     }
0431 
0432     gru_cb_set_istatus_active(cbk);
0433     gts->ustats.tlbdropin++;
0434     tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
0435               GRU_PAGESIZE(pageshift));
0436     gru_dbg(grudev,
0437         "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
0438         " rw %d, ps %d, gpa 0x%lx\n",
0439         atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
0440         indexway, write, pageshift, gpa);
0441     STAT(tlb_dropin);
0442     return 0;
0443 
0444 failnoasid:
0445     /* No asid (delayed unload). */
0446     STAT(tlb_dropin_fail_no_asid);
0447     gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
0448     if (!cbk)
0449         tfh_user_polling_mode(tfh);
0450     else
0451         gru_flush_cache(tfh);
0452     gru_flush_cache_cbe(cbe);
0453     return -EAGAIN;
0454 
0455 failupm:
0456     /* Atomic failure switch CBR to UPM */
0457     tfh_user_polling_mode(tfh);
0458     gru_flush_cache_cbe(cbe);
0459     STAT(tlb_dropin_fail_upm);
0460     gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
0461     return 1;
0462 
0463 failfmm:
0464     /* FMM state on UPM call */
0465     gru_flush_cache(tfh);
0466     gru_flush_cache_cbe(cbe);
0467     STAT(tlb_dropin_fail_fmm);
0468     gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
0469     return 0;
0470 
0471 failnoexception:
0472     /* TFH status did not show exception pending */
0473     gru_flush_cache(tfh);
0474     gru_flush_cache_cbe(cbe);
0475     if (cbk)
0476         gru_flush_cache(cbk);
0477     STAT(tlb_dropin_fail_no_exception);
0478     gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
0479         tfh, tfh->status, tfh->state);
0480     return 0;
0481 
0482 failidle:
0483     /* TFH state was idle  - no miss pending */
0484     gru_flush_cache(tfh);
0485     gru_flush_cache_cbe(cbe);
0486     if (cbk)
0487         gru_flush_cache(cbk);
0488     STAT(tlb_dropin_fail_idle);
0489     gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
0490     return 0;
0491 
0492 failinval:
0493     /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
0494     tfh_exception(tfh);
0495     gru_flush_cache_cbe(cbe);
0496     STAT(tlb_dropin_fail_invalid);
0497     gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
0498     return -EFAULT;
0499 
0500 failactive:
0501     /* Range invalidate active. Switch to UPM iff atomic */
0502     if (!cbk)
0503         tfh_user_polling_mode(tfh);
0504     else
0505         gru_flush_cache(tfh);
0506     gru_flush_cache_cbe(cbe);
0507     STAT(tlb_dropin_fail_range_active);
0508     gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
0509         tfh, vaddr);
0510     return 1;
0511 }
0512 
0513 /*
0514  * Process an external interrupt from the GRU. This interrupt is
0515  * caused by a TLB miss.
0516  * Note that this is the interrupt handler that is registered with linux
0517  * interrupt handlers.
0518  */
0519 static irqreturn_t gru_intr(int chiplet, int blade)
0520 {
0521     struct gru_state *gru;
0522     struct gru_tlb_fault_map imap, dmap;
0523     struct gru_thread_state *gts;
0524     struct gru_tlb_fault_handle *tfh = NULL;
0525     struct completion *cmp;
0526     int cbrnum, ctxnum;
0527 
0528     STAT(intr);
0529 
0530     gru = &gru_base[blade]->bs_grus[chiplet];
0531     if (!gru) {
0532         dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
0533             raw_smp_processor_id(), chiplet);
0534         return IRQ_NONE;
0535     }
0536     get_clear_fault_map(gru, &imap, &dmap);
0537     gru_dbg(grudev,
0538         "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
0539         smp_processor_id(), chiplet, gru->gs_gid,
0540         imap.fault_bits[0], imap.fault_bits[1],
0541         dmap.fault_bits[0], dmap.fault_bits[1]);
0542 
0543     for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
0544         STAT(intr_cbr);
0545         cmp = gru->gs_blade->bs_async_wq;
0546         if (cmp)
0547             complete(cmp);
0548         gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
0549             gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
0550     }
0551 
0552     for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
0553         STAT(intr_tfh);
0554         tfh = get_tfh_by_index(gru, cbrnum);
0555         prefetchw(tfh); /* Helps on hdw, required for emulator */
0556 
0557         /*
0558          * When hardware sets a bit in the faultmap, it implicitly
0559          * locks the GRU context so that it cannot be unloaded.
0560          * The gts cannot change until a TFH start/writestart command
0561          * is issued.
0562          */
0563         ctxnum = tfh->ctxnum;
0564         gts = gru->gs_gts[ctxnum];
0565 
0566         /* Spurious interrupts can cause this. Ignore. */
0567         if (!gts) {
0568             STAT(intr_spurious);
0569             continue;
0570         }
0571 
0572         /*
0573          * This is running in interrupt context. Trylock the mmap_lock.
0574          * If it fails, retry the fault in user context.
0575          */
0576         gts->ustats.fmm_tlbmiss++;
0577         if (!gts->ts_force_cch_reload &&
0578                     mmap_read_trylock(gts->ts_mm)) {
0579             gru_try_dropin(gru, gts, tfh, NULL);
0580             mmap_read_unlock(gts->ts_mm);
0581         } else {
0582             tfh_user_polling_mode(tfh);
0583             STAT(intr_mm_lock_failed);
0584         }
0585     }
0586     return IRQ_HANDLED;
0587 }
0588 
0589 irqreturn_t gru0_intr(int irq, void *dev_id)
0590 {
0591     return gru_intr(0, uv_numa_blade_id());
0592 }
0593 
0594 irqreturn_t gru1_intr(int irq, void *dev_id)
0595 {
0596     return gru_intr(1, uv_numa_blade_id());
0597 }
0598 
0599 irqreturn_t gru_intr_mblade(int irq, void *dev_id)
0600 {
0601     int blade;
0602 
0603     for_each_possible_blade(blade) {
0604         if (uv_blade_nr_possible_cpus(blade))
0605             continue;
0606         gru_intr(0, blade);
0607         gru_intr(1, blade);
0608     }
0609     return IRQ_HANDLED;
0610 }
0611 
0612 
0613 static int gru_user_dropin(struct gru_thread_state *gts,
0614                struct gru_tlb_fault_handle *tfh,
0615                void *cb)
0616 {
0617     struct gru_mm_struct *gms = gts->ts_gms;
0618     int ret;
0619 
0620     gts->ustats.upm_tlbmiss++;
0621     while (1) {
0622         wait_event(gms->ms_wait_queue,
0623                atomic_read(&gms->ms_range_active) == 0);
0624         prefetchw(tfh); /* Helps on hdw, required for emulator */
0625         ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
0626         if (ret <= 0)
0627             return ret;
0628         STAT(call_os_wait_queue);
0629     }
0630 }
0631 
0632 /*
0633  * This interface is called as a result of a user detecting a "call OS" bit
0634  * in a user CB. Normally means that a TLB fault has occurred.
0635  *  cb - user virtual address of the CB
0636  */
0637 int gru_handle_user_call_os(unsigned long cb)
0638 {
0639     struct gru_tlb_fault_handle *tfh;
0640     struct gru_thread_state *gts;
0641     void *cbk;
0642     int ucbnum, cbrnum, ret = -EINVAL;
0643 
0644     STAT(call_os);
0645 
0646     /* sanity check the cb pointer */
0647     ucbnum = get_cb_number((void *)cb);
0648     if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
0649         return -EINVAL;
0650 
0651     gts = gru_find_lock_gts(cb);
0652     if (!gts)
0653         return -EINVAL;
0654     gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
0655 
0656     if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
0657         goto exit;
0658 
0659     gru_check_context_placement(gts);
0660 
0661     /*
0662      * CCH may contain stale data if ts_force_cch_reload is set.
0663      */
0664     if (gts->ts_gru && gts->ts_force_cch_reload) {
0665         gts->ts_force_cch_reload = 0;
0666         gru_update_cch(gts);
0667     }
0668 
0669     ret = -EAGAIN;
0670     cbrnum = thread_cbr_number(gts, ucbnum);
0671     if (gts->ts_gru) {
0672         tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
0673         cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
0674                 gts->ts_ctxnum, ucbnum);
0675         ret = gru_user_dropin(gts, tfh, cbk);
0676     }
0677 exit:
0678     gru_unlock_gts(gts);
0679     return ret;
0680 }
0681 
0682 /*
0683  * Fetch the exception detail information for a CB that terminated with
0684  * an exception.
0685  */
0686 int gru_get_exception_detail(unsigned long arg)
0687 {
0688     struct control_block_extended_exc_detail excdet;
0689     struct gru_control_block_extended *cbe;
0690     struct gru_thread_state *gts;
0691     int ucbnum, cbrnum, ret;
0692 
0693     STAT(user_exception);
0694     if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
0695         return -EFAULT;
0696 
0697     gts = gru_find_lock_gts(excdet.cb);
0698     if (!gts)
0699         return -EINVAL;
0700 
0701     gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
0702     ucbnum = get_cb_number((void *)excdet.cb);
0703     if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
0704         ret = -EINVAL;
0705     } else if (gts->ts_gru) {
0706         cbrnum = thread_cbr_number(gts, ucbnum);
0707         cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
0708         gru_flush_cache(cbe);   /* CBE not coherent */
0709         sync_core();        /* make sure we are have current data */
0710         excdet.opc = cbe->opccpy;
0711         excdet.exopc = cbe->exopccpy;
0712         excdet.ecause = cbe->ecause;
0713         excdet.exceptdet0 = cbe->idef1upd;
0714         excdet.exceptdet1 = cbe->idef3upd;
0715         excdet.cbrstate = cbe->cbrstate;
0716         excdet.cbrexecstatus = cbe->cbrexecstatus;
0717         gru_flush_cache_cbe(cbe);
0718         ret = 0;
0719     } else {
0720         ret = -EAGAIN;
0721     }
0722     gru_unlock_gts(gts);
0723 
0724     gru_dbg(grudev,
0725         "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
0726         "exdet0 0x%lx, exdet1 0x%x\n",
0727         excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
0728         excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
0729     if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
0730         ret = -EFAULT;
0731     return ret;
0732 }
0733 
0734 /*
0735  * User request to unload a context. Content is saved for possible reload.
0736  */
0737 static int gru_unload_all_contexts(void)
0738 {
0739     struct gru_thread_state *gts;
0740     struct gru_state *gru;
0741     int gid, ctxnum;
0742 
0743     if (!capable(CAP_SYS_ADMIN))
0744         return -EPERM;
0745     foreach_gid(gid) {
0746         gru = GID_TO_GRU(gid);
0747         spin_lock(&gru->gs_lock);
0748         for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
0749             gts = gru->gs_gts[ctxnum];
0750             if (gts && mutex_trylock(&gts->ts_ctxlock)) {
0751                 spin_unlock(&gru->gs_lock);
0752                 gru_unload_context(gts, 1);
0753                 mutex_unlock(&gts->ts_ctxlock);
0754                 spin_lock(&gru->gs_lock);
0755             }
0756         }
0757         spin_unlock(&gru->gs_lock);
0758     }
0759     return 0;
0760 }
0761 
0762 int gru_user_unload_context(unsigned long arg)
0763 {
0764     struct gru_thread_state *gts;
0765     struct gru_unload_context_req req;
0766 
0767     STAT(user_unload_context);
0768     if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
0769         return -EFAULT;
0770 
0771     gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
0772 
0773     if (!req.gseg)
0774         return gru_unload_all_contexts();
0775 
0776     gts = gru_find_lock_gts(req.gseg);
0777     if (!gts)
0778         return -EINVAL;
0779 
0780     if (gts->ts_gru)
0781         gru_unload_context(gts, 1);
0782     gru_unlock_gts(gts);
0783 
0784     return 0;
0785 }
0786 
0787 /*
0788  * User request to flush a range of virtual addresses from the GRU TLB
0789  * (Mainly for testing).
0790  */
0791 int gru_user_flush_tlb(unsigned long arg)
0792 {
0793     struct gru_thread_state *gts;
0794     struct gru_flush_tlb_req req;
0795     struct gru_mm_struct *gms;
0796 
0797     STAT(user_flush_tlb);
0798     if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
0799         return -EFAULT;
0800 
0801     gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
0802         req.vaddr, req.len);
0803 
0804     gts = gru_find_lock_gts(req.gseg);
0805     if (!gts)
0806         return -EINVAL;
0807 
0808     gms = gts->ts_gms;
0809     gru_unlock_gts(gts);
0810     gru_flush_tlb_range(gms, req.vaddr, req.len);
0811 
0812     return 0;
0813 }
0814 
0815 /*
0816  * Fetch GSEG statisticss
0817  */
0818 long gru_get_gseg_statistics(unsigned long arg)
0819 {
0820     struct gru_thread_state *gts;
0821     struct gru_get_gseg_statistics_req req;
0822 
0823     if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
0824         return -EFAULT;
0825 
0826     /*
0827      * The library creates arrays of contexts for threaded programs.
0828      * If no gts exists in the array, the context has never been used & all
0829      * statistics are implicitly 0.
0830      */
0831     gts = gru_find_lock_gts(req.gseg);
0832     if (gts) {
0833         memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
0834         gru_unlock_gts(gts);
0835     } else {
0836         memset(&req.stats, 0, sizeof(gts->ustats));
0837     }
0838 
0839     if (copy_to_user((void __user *)arg, &req, sizeof(req)))
0840         return -EFAULT;
0841 
0842     return 0;
0843 }
0844 
0845 /*
0846  * Register the current task as the user of the GSEG slice.
0847  * Needed for TLB fault interrupt targeting.
0848  */
0849 int gru_set_context_option(unsigned long arg)
0850 {
0851     struct gru_thread_state *gts;
0852     struct gru_set_context_option_req req;
0853     int ret = 0;
0854 
0855     STAT(set_context_option);
0856     if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
0857         return -EFAULT;
0858     gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
0859 
0860     gts = gru_find_lock_gts(req.gseg);
0861     if (!gts) {
0862         gts = gru_alloc_locked_gts(req.gseg);
0863         if (IS_ERR(gts))
0864             return PTR_ERR(gts);
0865     }
0866 
0867     switch (req.op) {
0868     case sco_blade_chiplet:
0869         /* Select blade/chiplet for GRU context */
0870         if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
0871             req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
0872             (req.val1 >= 0 && !gru_base[req.val1])) {
0873             ret = -EINVAL;
0874         } else {
0875             gts->ts_user_blade_id = req.val1;
0876             gts->ts_user_chiplet_id = req.val0;
0877             gru_check_context_placement(gts);
0878         }
0879         break;
0880     case sco_gseg_owner:
0881         /* Register the current task as the GSEG owner */
0882         gts->ts_tgid_owner = current->tgid;
0883         break;
0884     case sco_cch_req_slice:
0885         /* Set the CCH slice option */
0886         gts->ts_cch_req_slice = req.val1 & 3;
0887         break;
0888     default:
0889         ret = -EINVAL;
0890     }
0891     gru_unlock_gts(gts);
0892 
0893     return ret;
0894 }