0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #include <linux/kernel.h>
0016 #include <linux/list.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/mm.h>
0019 #include <linux/slab.h>
0020 #include <linux/device.h>
0021 #include <linux/hugetlb.h>
0022 #include <linux/delay.h>
0023 #include <linux/timex.h>
0024 #include <linux/srcu.h>
0025 #include <asm/processor.h>
0026 #include "gru.h"
0027 #include "grutables.h"
0028 #include <asm/uv/uv_hub.h>
0029
0030 #define gru_random() get_cycles()
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 static inline int get_off_blade_tgh(struct gru_state *gru)
0048 {
0049 int n;
0050
0051 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
0052 n = gru_random() % n;
0053 n += gru->gs_tgh_first_remote;
0054 return n;
0055 }
0056
0057 static inline int get_on_blade_tgh(struct gru_state *gru)
0058 {
0059 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
0060 }
0061
0062 static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
0063 *gru)
0064 {
0065 struct gru_tlb_global_handle *tgh;
0066 int n;
0067
0068 preempt_disable();
0069 if (uv_numa_blade_id() == gru->gs_blade_id)
0070 n = get_on_blade_tgh(gru);
0071 else
0072 n = get_off_blade_tgh(gru);
0073 tgh = get_tgh_by_index(gru, n);
0074 lock_tgh_handle(tgh);
0075
0076 return tgh;
0077 }
0078
0079 static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
0080 {
0081 unlock_tgh_handle(tgh);
0082 preempt_enable();
0083 }
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145 void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
0146 unsigned long len)
0147 {
0148 struct gru_state *gru;
0149 struct gru_mm_tracker *asids;
0150 struct gru_tlb_global_handle *tgh;
0151 unsigned long num;
0152 int grupagesize, pagesize, pageshift, gid, asid;
0153
0154
0155 pageshift = PAGE_SHIFT;
0156 pagesize = (1UL << pageshift);
0157 grupagesize = GRU_PAGESIZE(pageshift);
0158 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
0159
0160 STAT(flush_tlb);
0161 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
0162 start, len, gms->ms_asidmap[0]);
0163
0164 spin_lock(&gms->ms_asid_lock);
0165 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
0166 STAT(flush_tlb_gru);
0167 gru = GID_TO_GRU(gid);
0168 asids = gms->ms_asids + gid;
0169 asid = asids->mt_asid;
0170 if (asids->mt_ctxbitmap && asid) {
0171 STAT(flush_tlb_gru_tgh);
0172 asid = GRUASID(asid, start);
0173 gru_dbg(grudev,
0174 " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
0175 gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
0176 tgh = get_lock_tgh_handle(gru);
0177 tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
0178 num - 1, asids->mt_ctxbitmap);
0179 get_unlock_tgh_handle(tgh);
0180 } else {
0181 STAT(flush_tlb_gru_zero_asid);
0182 asids->mt_asid = 0;
0183 __clear_bit(gru->gs_gid, gms->ms_asidmap);
0184 gru_dbg(grudev,
0185 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
0186 gid, asid, asids->mt_ctxbitmap,
0187 gms->ms_asidmap[0]);
0188 }
0189 }
0190 spin_unlock(&gms->ms_asid_lock);
0191 }
0192
0193
0194
0195
0196 void gru_flush_all_tlb(struct gru_state *gru)
0197 {
0198 struct gru_tlb_global_handle *tgh;
0199
0200 gru_dbg(grudev, "gid %d\n", gru->gs_gid);
0201 tgh = get_lock_tgh_handle(gru);
0202 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
0203 get_unlock_tgh_handle(tgh);
0204 }
0205
0206
0207
0208
0209 static int gru_invalidate_range_start(struct mmu_notifier *mn,
0210 const struct mmu_notifier_range *range)
0211 {
0212 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
0213 ms_notifier);
0214
0215 STAT(mmu_invalidate_range);
0216 atomic_inc(&gms->ms_range_active);
0217 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
0218 range->start, range->end, atomic_read(&gms->ms_range_active));
0219 gru_flush_tlb_range(gms, range->start, range->end - range->start);
0220
0221 return 0;
0222 }
0223
0224 static void gru_invalidate_range_end(struct mmu_notifier *mn,
0225 const struct mmu_notifier_range *range)
0226 {
0227 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
0228 ms_notifier);
0229
0230
0231 (void)atomic_dec_and_test(&gms->ms_range_active);
0232
0233 wake_up_all(&gms->ms_wait_queue);
0234 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n",
0235 gms, range->start, range->end);
0236 }
0237
0238 static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm)
0239 {
0240 struct gru_mm_struct *gms;
0241
0242 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
0243 if (!gms)
0244 return ERR_PTR(-ENOMEM);
0245 STAT(gms_alloc);
0246 spin_lock_init(&gms->ms_asid_lock);
0247 init_waitqueue_head(&gms->ms_wait_queue);
0248
0249 return &gms->ms_notifier;
0250 }
0251
0252 static void gru_free_notifier(struct mmu_notifier *mn)
0253 {
0254 kfree(container_of(mn, struct gru_mm_struct, ms_notifier));
0255 STAT(gms_free);
0256 }
0257
0258 static const struct mmu_notifier_ops gru_mmuops = {
0259 .invalidate_range_start = gru_invalidate_range_start,
0260 .invalidate_range_end = gru_invalidate_range_end,
0261 .alloc_notifier = gru_alloc_notifier,
0262 .free_notifier = gru_free_notifier,
0263 };
0264
0265 struct gru_mm_struct *gru_register_mmu_notifier(void)
0266 {
0267 struct mmu_notifier *mn;
0268
0269 mn = mmu_notifier_get_locked(&gru_mmuops, current->mm);
0270 if (IS_ERR(mn))
0271 return ERR_CAST(mn);
0272
0273 return container_of(mn, struct gru_mm_struct, ms_notifier);
0274 }
0275
0276 void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
0277 {
0278 mmu_notifier_put(&gms->ms_notifier);
0279 }
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293 #define MAX_LOCAL_TGH 16
0294
0295 void gru_tgh_flush_init(struct gru_state *gru)
0296 {
0297 int cpus, shift = 0, n;
0298
0299 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
0300
0301
0302 if (cpus) {
0303 n = 1 << fls(cpus - 1);
0304
0305
0306
0307
0308
0309
0310
0311 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
0312 }
0313 gru->gs_tgh_local_shift = shift;
0314
0315
0316 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
0317
0318 }