0001
0002
0003
0004
0005
0006 #include <linux/cpu.h>
0007 #include <linux/cpufreq.h>
0008 #include <linux/delay.h>
0009 #include <linux/dma-mapping.h>
0010 #include <linux/module.h>
0011 #include <linux/of.h>
0012 #include <linux/of_platform.h>
0013 #include <linux/platform_device.h>
0014 #include <linux/slab.h>
0015
0016 #include <asm/smp_plat.h>
0017
0018 #include <soc/tegra/bpmp.h>
0019 #include <soc/tegra/bpmp-abi.h>
0020
0021 #define KHZ 1000
0022 #define REF_CLK_MHZ 408
0023 #define US_DELAY 500
0024 #define CPUFREQ_TBL_STEP_HZ (50 * KHZ * KHZ)
0025 #define MAX_CNT ~0U
0026
0027 #define NDIV_MASK 0x1FF
0028
0029 #define CORE_OFFSET(cpu) (cpu * 8)
0030 #define CMU_CLKS_BASE 0x2000
0031 #define SCRATCH_FREQ_CORE_REG(data, cpu) (data->regs + CMU_CLKS_BASE + CORE_OFFSET(cpu))
0032
0033 #define MMCRAB_CLUSTER_BASE(cl) (0x30000 + (cl * 0x10000))
0034 #define CLUSTER_ACTMON_BASE(data, cl) \
0035 (data->regs + (MMCRAB_CLUSTER_BASE(cl) + data->soc->actmon_cntr_base))
0036 #define CORE_ACTMON_CNTR_REG(data, cl, cpu) (CLUSTER_ACTMON_BASE(data, cl) + CORE_OFFSET(cpu))
0037
0038
0039 #define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000)
0040
0041 enum cluster {
0042 CLUSTER0,
0043 CLUSTER1,
0044 CLUSTER2,
0045 CLUSTER3,
0046 MAX_CLUSTERS,
0047 };
0048
0049 struct tegra_cpu_ctr {
0050 u32 cpu;
0051 u32 coreclk_cnt, last_coreclk_cnt;
0052 u32 refclk_cnt, last_refclk_cnt;
0053 };
0054
0055 struct read_counters_work {
0056 struct work_struct work;
0057 struct tegra_cpu_ctr c;
0058 };
0059
0060 struct tegra_cpufreq_ops {
0061 void (*read_counters)(struct tegra_cpu_ctr *c);
0062 void (*set_cpu_ndiv)(struct cpufreq_policy *policy, u64 ndiv);
0063 void (*get_cpu_cluster_id)(u32 cpu, u32 *cpuid, u32 *clusterid);
0064 int (*get_cpu_ndiv)(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv);
0065 };
0066
0067 struct tegra_cpufreq_soc {
0068 struct tegra_cpufreq_ops *ops;
0069 int maxcpus_per_cluster;
0070 phys_addr_t actmon_cntr_base;
0071 };
0072
0073 struct tegra194_cpufreq_data {
0074 void __iomem *regs;
0075 size_t num_clusters;
0076 struct cpufreq_frequency_table **tables;
0077 const struct tegra_cpufreq_soc *soc;
0078 };
0079
0080 static struct workqueue_struct *read_counters_wq;
0081
0082 static void tegra_get_cpu_mpidr(void *mpidr)
0083 {
0084 *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
0085 }
0086
0087 static void tegra234_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid)
0088 {
0089 u64 mpidr;
0090
0091 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true);
0092
0093 if (cpuid)
0094 *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
0095 if (clusterid)
0096 *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 2);
0097 }
0098
0099 static int tegra234_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv)
0100 {
0101 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0102 void __iomem *freq_core_reg;
0103 u64 mpidr_id;
0104
0105
0106 mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid;
0107 freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id);
0108
0109 *ndiv = readl(freq_core_reg) & NDIV_MASK;
0110
0111 return 0;
0112 }
0113
0114 static void tegra234_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv)
0115 {
0116 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0117 void __iomem *freq_core_reg;
0118 u32 cpu, cpuid, clusterid;
0119 u64 mpidr_id;
0120
0121 for_each_cpu_and(cpu, policy->cpus, cpu_online_mask) {
0122 data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid);
0123
0124
0125 mpidr_id = (clusterid * data->soc->maxcpus_per_cluster) + cpuid;
0126 freq_core_reg = SCRATCH_FREQ_CORE_REG(data, mpidr_id);
0127
0128 writel(ndiv, freq_core_reg);
0129 }
0130 }
0131
0132
0133
0134
0135
0136
0137
0138
0139 static void tegra234_read_counters(struct tegra_cpu_ctr *c)
0140 {
0141 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0142 void __iomem *actmon_reg;
0143 u32 cpuid, clusterid;
0144 u64 val;
0145
0146 data->soc->ops->get_cpu_cluster_id(c->cpu, &cpuid, &clusterid);
0147 actmon_reg = CORE_ACTMON_CNTR_REG(data, clusterid, cpuid);
0148
0149 val = readq(actmon_reg);
0150 c->last_refclk_cnt = upper_32_bits(val);
0151 c->last_coreclk_cnt = lower_32_bits(val);
0152 udelay(US_DELAY);
0153 val = readq(actmon_reg);
0154 c->refclk_cnt = upper_32_bits(val);
0155 c->coreclk_cnt = lower_32_bits(val);
0156 }
0157
0158 static struct tegra_cpufreq_ops tegra234_cpufreq_ops = {
0159 .read_counters = tegra234_read_counters,
0160 .get_cpu_cluster_id = tegra234_get_cpu_cluster_id,
0161 .get_cpu_ndiv = tegra234_get_cpu_ndiv,
0162 .set_cpu_ndiv = tegra234_set_cpu_ndiv,
0163 };
0164
0165 static const struct tegra_cpufreq_soc tegra234_cpufreq_soc = {
0166 .ops = &tegra234_cpufreq_ops,
0167 .actmon_cntr_base = 0x9000,
0168 .maxcpus_per_cluster = 4,
0169 };
0170
0171 static void tegra194_get_cpu_cluster_id(u32 cpu, u32 *cpuid, u32 *clusterid)
0172 {
0173 u64 mpidr;
0174
0175 smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true);
0176
0177 if (cpuid)
0178 *cpuid = MPIDR_AFFINITY_LEVEL(mpidr, 0);
0179 if (clusterid)
0180 *clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
0181 }
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192 static u64 read_freq_feedback(void)
0193 {
0194 u64 val = 0;
0195
0196 asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
0197
0198 return val;
0199 }
0200
0201 static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
0202 *nltbl, u16 ndiv)
0203 {
0204 return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv);
0205 }
0206
0207 static void tegra194_read_counters(struct tegra_cpu_ctr *c)
0208 {
0209 u64 val;
0210
0211 val = read_freq_feedback();
0212 c->last_refclk_cnt = lower_32_bits(val);
0213 c->last_coreclk_cnt = upper_32_bits(val);
0214 udelay(US_DELAY);
0215 val = read_freq_feedback();
0216 c->refclk_cnt = lower_32_bits(val);
0217 c->coreclk_cnt = upper_32_bits(val);
0218 }
0219
0220 static void tegra_read_counters(struct work_struct *work)
0221 {
0222 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0223 struct read_counters_work *read_counters_work;
0224 struct tegra_cpu_ctr *c;
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238 read_counters_work = container_of(work, struct read_counters_work,
0239 work);
0240 c = &read_counters_work->c;
0241
0242 data->soc->ops->read_counters(c);
0243 }
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266 static unsigned int tegra194_calculate_speed(u32 cpu)
0267 {
0268 struct read_counters_work read_counters_work;
0269 struct tegra_cpu_ctr c;
0270 u32 delta_refcnt;
0271 u32 delta_ccnt;
0272 u32 rate_mhz;
0273
0274
0275
0276
0277
0278
0279 read_counters_work.c.cpu = cpu;
0280 INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
0281 queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
0282 flush_work(&read_counters_work.work);
0283 c = read_counters_work.c;
0284
0285 if (c.coreclk_cnt < c.last_coreclk_cnt)
0286 delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt);
0287 else
0288 delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt;
0289 if (!delta_ccnt)
0290 return 0;
0291
0292
0293 if (c.refclk_cnt < c.last_refclk_cnt)
0294 delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt);
0295 else
0296 delta_refcnt = c.refclk_cnt - c.last_refclk_cnt;
0297 if (!delta_refcnt) {
0298 pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu);
0299 return 0;
0300 }
0301 rate_mhz = ((unsigned long)(delta_ccnt * REF_CLK_MHZ)) / delta_refcnt;
0302
0303 return (rate_mhz * KHZ);
0304 }
0305
0306 static void tegra194_get_cpu_ndiv_sysreg(void *ndiv)
0307 {
0308 u64 ndiv_val;
0309
0310 asm volatile("mrs %0, s3_0_c15_c0_4" : "=r" (ndiv_val) : );
0311
0312 *(u64 *)ndiv = ndiv_val;
0313 }
0314
0315 static int tegra194_get_cpu_ndiv(u32 cpu, u32 cpuid, u32 clusterid, u64 *ndiv)
0316 {
0317 int ret;
0318
0319 ret = smp_call_function_single(cpu, tegra194_get_cpu_ndiv_sysreg, &ndiv, true);
0320
0321 return ret;
0322 }
0323
0324 static void tegra194_set_cpu_ndiv_sysreg(void *data)
0325 {
0326 u64 ndiv_val = *(u64 *)data;
0327
0328 asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val));
0329 }
0330
0331 static void tegra194_set_cpu_ndiv(struct cpufreq_policy *policy, u64 ndiv)
0332 {
0333 on_each_cpu_mask(policy->cpus, tegra194_set_cpu_ndiv_sysreg, &ndiv, true);
0334 }
0335
0336 static unsigned int tegra194_get_speed(u32 cpu)
0337 {
0338 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0339 struct cpufreq_frequency_table *pos;
0340 u32 cpuid, clusterid;
0341 unsigned int rate;
0342 u64 ndiv;
0343 int ret;
0344
0345 data->soc->ops->get_cpu_cluster_id(cpu, &cpuid, &clusterid);
0346
0347
0348 rate = tegra194_calculate_speed(cpu);
0349
0350
0351 ret = data->soc->ops->get_cpu_ndiv(cpu, cpuid, clusterid, &ndiv);
0352 if (WARN_ON_ONCE(ret))
0353 return rate;
0354
0355
0356
0357
0358
0359
0360
0361 cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) {
0362 if (pos->driver_data != ndiv)
0363 continue;
0364
0365 if (abs(pos->frequency - rate) > 115200) {
0366 pr_warn("cpufreq: cpu%d,cur:%u,set:%u,set ndiv:%llu\n",
0367 cpu, rate, pos->frequency, ndiv);
0368 } else {
0369 rate = pos->frequency;
0370 }
0371 break;
0372 }
0373 return rate;
0374 }
0375
0376 static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
0377 {
0378 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0379 int maxcpus_per_cluster = data->soc->maxcpus_per_cluster;
0380 u32 start_cpu, cpu;
0381 u32 clusterid;
0382
0383 data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid);
0384
0385 if (clusterid >= data->num_clusters || !data->tables[clusterid])
0386 return -EINVAL;
0387
0388 start_cpu = rounddown(policy->cpu, maxcpus_per_cluster);
0389
0390 for (cpu = start_cpu; cpu < (start_cpu + maxcpus_per_cluster); cpu++) {
0391 if (cpu_possible(cpu))
0392 cpumask_set_cpu(cpu, policy->cpus);
0393 }
0394 policy->freq_table = data->tables[clusterid];
0395 policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY;
0396
0397 return 0;
0398 }
0399
0400 static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
0401 unsigned int index)
0402 {
0403 struct cpufreq_frequency_table *tbl = policy->freq_table + index;
0404 struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
0405
0406
0407
0408
0409
0410
0411 data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data);
0412
0413 return 0;
0414 }
0415
0416 static struct cpufreq_driver tegra194_cpufreq_driver = {
0417 .name = "tegra194",
0418 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
0419 .verify = cpufreq_generic_frequency_table_verify,
0420 .target_index = tegra194_cpufreq_set_target,
0421 .get = tegra194_get_speed,
0422 .init = tegra194_cpufreq_init,
0423 .attr = cpufreq_generic_attr,
0424 };
0425
0426 static struct tegra_cpufreq_ops tegra194_cpufreq_ops = {
0427 .read_counters = tegra194_read_counters,
0428 .get_cpu_cluster_id = tegra194_get_cpu_cluster_id,
0429 .get_cpu_ndiv = tegra194_get_cpu_ndiv,
0430 .set_cpu_ndiv = tegra194_set_cpu_ndiv,
0431 };
0432
0433 static const struct tegra_cpufreq_soc tegra194_cpufreq_soc = {
0434 .ops = &tegra194_cpufreq_ops,
0435 .maxcpus_per_cluster = 2,
0436 };
0437
0438 static void tegra194_cpufreq_free_resources(void)
0439 {
0440 destroy_workqueue(read_counters_wq);
0441 }
0442
0443 static struct cpufreq_frequency_table *
0444 init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp,
0445 unsigned int cluster_id)
0446 {
0447 struct cpufreq_frequency_table *freq_table;
0448 struct mrq_cpu_ndiv_limits_response resp;
0449 unsigned int num_freqs, ndiv, delta_ndiv;
0450 struct mrq_cpu_ndiv_limits_request req;
0451 struct tegra_bpmp_message msg;
0452 u16 freq_table_step_size;
0453 int err, index;
0454
0455 memset(&req, 0, sizeof(req));
0456 req.cluster_id = cluster_id;
0457
0458 memset(&msg, 0, sizeof(msg));
0459 msg.mrq = MRQ_CPU_NDIV_LIMITS;
0460 msg.tx.data = &req;
0461 msg.tx.size = sizeof(req);
0462 msg.rx.data = &resp;
0463 msg.rx.size = sizeof(resp);
0464
0465 err = tegra_bpmp_transfer(bpmp, &msg);
0466 if (err)
0467 return ERR_PTR(err);
0468 if (msg.rx.ret == -BPMP_EINVAL) {
0469
0470 return NULL;
0471 }
0472 if (msg.rx.ret)
0473 return ERR_PTR(-EINVAL);
0474
0475
0476
0477
0478
0479 freq_table_step_size = resp.mdiv *
0480 DIV_ROUND_UP(CPUFREQ_TBL_STEP_HZ, resp.ref_clk_hz);
0481
0482 dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n",
0483 cluster_id, freq_table_step_size);
0484
0485 delta_ndiv = resp.ndiv_max - resp.ndiv_min;
0486
0487 if (unlikely(delta_ndiv == 0)) {
0488 num_freqs = 1;
0489 } else {
0490
0491 num_freqs = delta_ndiv / freq_table_step_size + 1;
0492 }
0493
0494 num_freqs += (delta_ndiv % freq_table_step_size) ? 1 : 0;
0495
0496 freq_table = devm_kcalloc(&pdev->dev, num_freqs + 1,
0497 sizeof(*freq_table), GFP_KERNEL);
0498 if (!freq_table)
0499 return ERR_PTR(-ENOMEM);
0500
0501 for (index = 0, ndiv = resp.ndiv_min;
0502 ndiv < resp.ndiv_max;
0503 index++, ndiv += freq_table_step_size) {
0504 freq_table[index].driver_data = ndiv;
0505 freq_table[index].frequency = map_ndiv_to_freq(&resp, ndiv);
0506 }
0507
0508 freq_table[index].driver_data = resp.ndiv_max;
0509 freq_table[index++].frequency = map_ndiv_to_freq(&resp, resp.ndiv_max);
0510 freq_table[index].frequency = CPUFREQ_TABLE_END;
0511
0512 return freq_table;
0513 }
0514
0515 static int tegra194_cpufreq_probe(struct platform_device *pdev)
0516 {
0517 const struct tegra_cpufreq_soc *soc;
0518 struct tegra194_cpufreq_data *data;
0519 struct tegra_bpmp *bpmp;
0520 int err, i;
0521
0522 data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
0523 if (!data)
0524 return -ENOMEM;
0525
0526 soc = of_device_get_match_data(&pdev->dev);
0527
0528 if (soc->ops && soc->maxcpus_per_cluster) {
0529 data->soc = soc;
0530 } else {
0531 dev_err(&pdev->dev, "soc data missing\n");
0532 return -EINVAL;
0533 }
0534
0535 data->num_clusters = MAX_CLUSTERS;
0536 data->tables = devm_kcalloc(&pdev->dev, data->num_clusters,
0537 sizeof(*data->tables), GFP_KERNEL);
0538 if (!data->tables)
0539 return -ENOMEM;
0540
0541 if (soc->actmon_cntr_base) {
0542
0543 data->regs = devm_platform_ioremap_resource(pdev, 0);
0544 if (IS_ERR(data->regs))
0545 return PTR_ERR(data->regs);
0546 }
0547
0548 platform_set_drvdata(pdev, data);
0549
0550 bpmp = tegra_bpmp_get(&pdev->dev);
0551 if (IS_ERR(bpmp))
0552 return PTR_ERR(bpmp);
0553
0554 read_counters_wq = alloc_workqueue("read_counters_wq", __WQ_LEGACY, 1);
0555 if (!read_counters_wq) {
0556 dev_err(&pdev->dev, "fail to create_workqueue\n");
0557 err = -EINVAL;
0558 goto put_bpmp;
0559 }
0560
0561 for (i = 0; i < data->num_clusters; i++) {
0562 data->tables[i] = init_freq_table(pdev, bpmp, i);
0563 if (IS_ERR(data->tables[i])) {
0564 err = PTR_ERR(data->tables[i]);
0565 goto err_free_res;
0566 }
0567 }
0568
0569 tegra194_cpufreq_driver.driver_data = data;
0570
0571 err = cpufreq_register_driver(&tegra194_cpufreq_driver);
0572 if (!err)
0573 goto put_bpmp;
0574
0575 err_free_res:
0576 tegra194_cpufreq_free_resources();
0577 put_bpmp:
0578 tegra_bpmp_put(bpmp);
0579 return err;
0580 }
0581
0582 static int tegra194_cpufreq_remove(struct platform_device *pdev)
0583 {
0584 cpufreq_unregister_driver(&tegra194_cpufreq_driver);
0585 tegra194_cpufreq_free_resources();
0586
0587 return 0;
0588 }
0589
0590 static const struct of_device_id tegra194_cpufreq_of_match[] = {
0591 { .compatible = "nvidia,tegra194-ccplex", .data = &tegra194_cpufreq_soc },
0592 { .compatible = "nvidia,tegra234-ccplex-cluster", .data = &tegra234_cpufreq_soc },
0593 { }
0594 };
0595
0596 static struct platform_driver tegra194_ccplex_driver = {
0597 .driver = {
0598 .name = "tegra194-cpufreq",
0599 .of_match_table = tegra194_cpufreq_of_match,
0600 },
0601 .probe = tegra194_cpufreq_probe,
0602 .remove = tegra194_cpufreq_remove,
0603 };
0604 module_platform_driver(tegra194_ccplex_driver);
0605
0606 MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
0607 MODULE_AUTHOR("Sumit Gupta <sumitg@nvidia.com>");
0608 MODULE_DESCRIPTION("NVIDIA Tegra194 cpufreq driver");
0609 MODULE_LICENSE("GPL v2");