Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Copyright 2016-2021 HabanaLabs, Ltd.
0005  * All Rights Reserved.
0006  *
0007  */
0008 
0009 #define pr_fmt(fmt)     "habanalabs: " fmt
0010 
0011 #include "habanalabs.h"
0012 
0013 #include <linux/pci.h>
0014 #include <linux/aer.h>
0015 #include <linux/module.h>
0016 
0017 #define HL_DRIVER_AUTHOR    "HabanaLabs Kernel Driver Team"
0018 
0019 #define HL_DRIVER_DESC      "Driver for HabanaLabs's AI Accelerators"
0020 
0021 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
0022 MODULE_DESCRIPTION(HL_DRIVER_DESC);
0023 MODULE_LICENSE("GPL v2");
0024 
0025 static int hl_major;
0026 static struct class *hl_class;
0027 static DEFINE_IDR(hl_devs_idr);
0028 static DEFINE_MUTEX(hl_devs_idr_lock);
0029 
0030 static int timeout_locked = 30;
0031 static int reset_on_lockup = 1;
0032 static int memory_scrub;
0033 static ulong boot_error_status_mask = ULONG_MAX;
0034 
0035 module_param(timeout_locked, int, 0444);
0036 MODULE_PARM_DESC(timeout_locked,
0037     "Device lockup timeout in seconds (0 = disabled, default 30s)");
0038 
0039 module_param(reset_on_lockup, int, 0444);
0040 MODULE_PARM_DESC(reset_on_lockup,
0041     "Do device reset on lockup (0 = no, 1 = yes, default yes)");
0042 
0043 module_param(memory_scrub, int, 0444);
0044 MODULE_PARM_DESC(memory_scrub,
0045     "Scrub device memory in various states (0 = no, 1 = yes, default no)");
0046 
0047 module_param(boot_error_status_mask, ulong, 0444);
0048 MODULE_PARM_DESC(boot_error_status_mask,
0049     "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
0050 
0051 #define PCI_VENDOR_ID_HABANALABS    0x1da3
0052 
0053 #define PCI_IDS_GOYA            0x0001
0054 #define PCI_IDS_GAUDI           0x1000
0055 #define PCI_IDS_GAUDI_SEC       0x1010
0056 
0057 #define PCI_IDS_GAUDI2          0x1020
0058 #define PCI_IDS_GAUDI2_SEC      0x1030
0059 
0060 static const struct pci_device_id ids[] = {
0061     { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
0062     { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
0063     { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
0064     { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
0065     { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
0066     { 0, }
0067 };
0068 MODULE_DEVICE_TABLE(pci, ids);
0069 
0070 /*
0071  * get_asic_type - translate device id to asic type
0072  *
0073  * @device: id of the PCI device
0074  *
0075  * Translate device id to asic type.
0076  * In case of unidentified device, return -1
0077  */
0078 static enum hl_asic_type get_asic_type(u16 device)
0079 {
0080     enum hl_asic_type asic_type;
0081 
0082     switch (device) {
0083     case PCI_IDS_GOYA:
0084         asic_type = ASIC_GOYA;
0085         break;
0086     case PCI_IDS_GAUDI:
0087         asic_type = ASIC_GAUDI;
0088         break;
0089     case PCI_IDS_GAUDI_SEC:
0090         asic_type = ASIC_GAUDI_SEC;
0091         break;
0092     case PCI_IDS_GAUDI2:
0093         asic_type = ASIC_GAUDI2;
0094         break;
0095     case PCI_IDS_GAUDI2_SEC:
0096         asic_type = ASIC_GAUDI2_SEC;
0097         break;
0098     default:
0099         asic_type = ASIC_INVALID;
0100         break;
0101     }
0102 
0103     return asic_type;
0104 }
0105 
0106 static bool is_asic_secured(enum hl_asic_type asic_type)
0107 {
0108     switch (asic_type) {
0109     case ASIC_GAUDI_SEC:
0110     case ASIC_GAUDI2_SEC:
0111         return true;
0112     default:
0113         return false;
0114     }
0115 }
0116 
0117 /*
0118  * hl_device_open - open function for habanalabs device
0119  *
0120  * @inode: pointer to inode structure
0121  * @filp: pointer to file structure
0122  *
0123  * Called when process opens an habanalabs device.
0124  */
0125 int hl_device_open(struct inode *inode, struct file *filp)
0126 {
0127     enum hl_device_status status;
0128     struct hl_device *hdev;
0129     struct hl_fpriv *hpriv;
0130     int rc;
0131 
0132     mutex_lock(&hl_devs_idr_lock);
0133     hdev = idr_find(&hl_devs_idr, iminor(inode));
0134     mutex_unlock(&hl_devs_idr_lock);
0135 
0136     if (!hdev) {
0137         pr_err("Couldn't find device %d:%d\n",
0138             imajor(inode), iminor(inode));
0139         return -ENXIO;
0140     }
0141 
0142     hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
0143     if (!hpriv)
0144         return -ENOMEM;
0145 
0146     hpriv->hdev = hdev;
0147     filp->private_data = hpriv;
0148     hpriv->filp = filp;
0149 
0150     mutex_init(&hpriv->notifier_event.lock);
0151     mutex_init(&hpriv->restore_phase_mutex);
0152     mutex_init(&hpriv->ctx_lock);
0153     kref_init(&hpriv->refcount);
0154     nonseekable_open(inode, filp);
0155 
0156     hl_ctx_mgr_init(&hpriv->ctx_mgr);
0157     hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
0158 
0159     hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
0160 
0161     mutex_lock(&hdev->fpriv_list_lock);
0162 
0163     if (!hl_device_operational(hdev, &status)) {
0164         dev_err_ratelimited(hdev->dev,
0165             "Can't open %s because it is %s\n",
0166             dev_name(hdev->dev), hdev->status[status]);
0167 
0168         if (status == HL_DEVICE_STATUS_IN_RESET ||
0169                     status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
0170             rc = -EAGAIN;
0171         else
0172             rc = -EPERM;
0173 
0174         goto out_err;
0175     }
0176 
0177     if (hdev->is_in_dram_scrub) {
0178         dev_dbg_ratelimited(hdev->dev,
0179             "Can't open %s during dram scrub\n",
0180             dev_name(hdev->dev));
0181         rc = -EAGAIN;
0182         goto out_err;
0183     }
0184 
0185     if (hdev->compute_ctx_in_release) {
0186         dev_dbg_ratelimited(hdev->dev,
0187             "Can't open %s because another user is still releasing it\n",
0188             dev_name(hdev->dev));
0189         rc = -EAGAIN;
0190         goto out_err;
0191     }
0192 
0193     if (hdev->is_compute_ctx_active) {
0194         dev_dbg_ratelimited(hdev->dev,
0195             "Can't open %s because another user is working on it\n",
0196             dev_name(hdev->dev));
0197         rc = -EBUSY;
0198         goto out_err;
0199     }
0200 
0201     rc = hl_ctx_create(hdev, hpriv);
0202     if (rc) {
0203         dev_err(hdev->dev, "Failed to create context %d\n", rc);
0204         goto out_err;
0205     }
0206 
0207     list_add(&hpriv->dev_node, &hdev->fpriv_list);
0208     mutex_unlock(&hdev->fpriv_list_lock);
0209 
0210     hl_debugfs_add_file(hpriv);
0211 
0212     atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
0213     atomic_set(&hdev->last_error.razwi.write_enable, 1);
0214     hdev->last_error.undef_opcode.write_enable = true;
0215 
0216     hdev->open_counter++;
0217     hdev->last_successful_open_jif = jiffies;
0218     hdev->last_successful_open_ktime = ktime_get();
0219 
0220     return 0;
0221 
0222 out_err:
0223     mutex_unlock(&hdev->fpriv_list_lock);
0224     hl_mem_mgr_fini(&hpriv->mem_mgr);
0225     hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
0226     filp->private_data = NULL;
0227     mutex_destroy(&hpriv->ctx_lock);
0228     mutex_destroy(&hpriv->restore_phase_mutex);
0229     mutex_destroy(&hpriv->notifier_event.lock);
0230     put_pid(hpriv->taskpid);
0231 
0232     kfree(hpriv);
0233 
0234     return rc;
0235 }
0236 
0237 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
0238 {
0239     struct hl_device *hdev;
0240     struct hl_fpriv *hpriv;
0241     int rc;
0242 
0243     mutex_lock(&hl_devs_idr_lock);
0244     hdev = idr_find(&hl_devs_idr, iminor(inode));
0245     mutex_unlock(&hl_devs_idr_lock);
0246 
0247     if (!hdev) {
0248         pr_err("Couldn't find device %d:%d\n",
0249             imajor(inode), iminor(inode));
0250         return -ENXIO;
0251     }
0252 
0253     hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
0254     if (!hpriv)
0255         return -ENOMEM;
0256 
0257     /* Prevent other routines from reading partial hpriv data by
0258      * initializing hpriv fields before inserting it to the list
0259      */
0260     hpriv->hdev = hdev;
0261     filp->private_data = hpriv;
0262     hpriv->filp = filp;
0263 
0264     mutex_init(&hpriv->notifier_event.lock);
0265     nonseekable_open(inode, filp);
0266 
0267     hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
0268 
0269     mutex_lock(&hdev->fpriv_ctrl_list_lock);
0270 
0271     if (!hl_device_operational(hdev, NULL)) {
0272         dev_err_ratelimited(hdev->dev_ctrl,
0273             "Can't open %s because it is disabled or in reset\n",
0274             dev_name(hdev->dev_ctrl));
0275         rc = -EPERM;
0276         goto out_err;
0277     }
0278 
0279     list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
0280     mutex_unlock(&hdev->fpriv_ctrl_list_lock);
0281 
0282     return 0;
0283 
0284 out_err:
0285     mutex_unlock(&hdev->fpriv_ctrl_list_lock);
0286     filp->private_data = NULL;
0287     put_pid(hpriv->taskpid);
0288 
0289     kfree(hpriv);
0290 
0291     return rc;
0292 }
0293 
0294 static void set_driver_behavior_per_device(struct hl_device *hdev)
0295 {
0296     hdev->nic_ports_mask = 0;
0297     hdev->fw_components = FW_TYPE_ALL_TYPES;
0298     hdev->mmu_enable = MMU_EN_ALL;
0299     hdev->cpu_queues_enable = 1;
0300     hdev->pldm = 0;
0301     hdev->hard_reset_on_fw_events = 1;
0302     hdev->bmc_enable = 1;
0303     hdev->reset_on_preboot_fail = 1;
0304     hdev->heartbeat = 1;
0305 }
0306 
0307 static void copy_kernel_module_params_to_device(struct hl_device *hdev)
0308 {
0309     hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
0310 
0311     hdev->major = hl_major;
0312     hdev->memory_scrub = memory_scrub;
0313     hdev->reset_on_lockup = reset_on_lockup;
0314     hdev->boot_error_status_mask = boot_error_status_mask;
0315 }
0316 
0317 static void fixup_device_params_per_asic(struct hl_device *hdev)
0318 {
0319     switch (hdev->asic_type) {
0320     case ASIC_GOYA:
0321     case ASIC_GAUDI:
0322     case ASIC_GAUDI_SEC:
0323         hdev->reset_upon_device_release = 0;
0324         break;
0325 
0326     default:
0327         hdev->reset_upon_device_release = 1;
0328         break;
0329     }
0330 }
0331 
0332 static int fixup_device_params(struct hl_device *hdev)
0333 {
0334     int tmp_timeout;
0335 
0336     tmp_timeout = timeout_locked;
0337 
0338     hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
0339     hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
0340 
0341     if (tmp_timeout)
0342         hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
0343     else
0344         hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
0345 
0346     hdev->stop_on_err = true;
0347     hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
0348     hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
0349 
0350     /* Enable only after the initialization of the device */
0351     hdev->disabled = true;
0352 
0353     if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) &&
0354             (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) {
0355         pr_err("Preboot must be set along with other components");
0356         return -EINVAL;
0357     }
0358 
0359     /* If CPU queues not enabled, no way to do heartbeat */
0360     if (!hdev->cpu_queues_enable)
0361         hdev->heartbeat = 0;
0362 
0363     fixup_device_params_per_asic(hdev);
0364 
0365     return 0;
0366 }
0367 
0368 /**
0369  * create_hdev - create habanalabs device instance
0370  *
0371  * @dev: will hold the pointer to the new habanalabs device structure
0372  * @pdev: pointer to the pci device
0373  *
0374  * Allocate memory for habanalabs device and initialize basic fields
0375  * Identify the ASIC type
0376  * Allocate ID (minor) for the device (only for real devices)
0377  */
0378 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
0379 {
0380     int main_id, ctrl_id = 0, rc = 0;
0381     struct hl_device *hdev;
0382 
0383     *dev = NULL;
0384 
0385     hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
0386     if (!hdev)
0387         return -ENOMEM;
0388 
0389     /* Will be NULL in case of simulator device */
0390     hdev->pdev = pdev;
0391 
0392     /* Assign status description string */
0393     strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
0394     strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
0395     strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
0396     strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
0397     strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
0398                     "in device creation", HL_STR_MAX);
0399     strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
0400                     "in reset after device release", HL_STR_MAX);
0401 
0402 
0403     /* First, we must find out which ASIC are we handling. This is needed
0404      * to configure the behavior of the driver (kernel parameters)
0405      */
0406     hdev->asic_type = get_asic_type(pdev->device);
0407     if (hdev->asic_type == ASIC_INVALID) {
0408         dev_err(&pdev->dev, "Unsupported ASIC\n");
0409         rc = -ENODEV;
0410         goto free_hdev;
0411     }
0412 
0413     copy_kernel_module_params_to_device(hdev);
0414 
0415     set_driver_behavior_per_device(hdev);
0416 
0417     fixup_device_params(hdev);
0418 
0419     mutex_lock(&hl_devs_idr_lock);
0420 
0421     /* Always save 2 numbers, 1 for main device and 1 for control.
0422      * They must be consecutive
0423      */
0424     main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
0425 
0426     if (main_id >= 0)
0427         ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
0428                     main_id + 2, GFP_KERNEL);
0429 
0430     mutex_unlock(&hl_devs_idr_lock);
0431 
0432     if ((main_id < 0) || (ctrl_id < 0)) {
0433         if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
0434             pr_err("too many devices in the system\n");
0435 
0436         if (main_id >= 0) {
0437             mutex_lock(&hl_devs_idr_lock);
0438             idr_remove(&hl_devs_idr, main_id);
0439             mutex_unlock(&hl_devs_idr_lock);
0440         }
0441 
0442         rc = -EBUSY;
0443         goto free_hdev;
0444     }
0445 
0446     hdev->id = main_id;
0447     hdev->id_control = ctrl_id;
0448 
0449     *dev = hdev;
0450 
0451     return 0;
0452 
0453 free_hdev:
0454     kfree(hdev);
0455     return rc;
0456 }
0457 
0458 /*
0459  * destroy_hdev - destroy habanalabs device instance
0460  *
0461  * @dev: pointer to the habanalabs device structure
0462  *
0463  */
0464 static void destroy_hdev(struct hl_device *hdev)
0465 {
0466     /* Remove device from the device list */
0467     mutex_lock(&hl_devs_idr_lock);
0468     idr_remove(&hl_devs_idr, hdev->id);
0469     idr_remove(&hl_devs_idr, hdev->id_control);
0470     mutex_unlock(&hl_devs_idr_lock);
0471 
0472     kfree(hdev);
0473 }
0474 
0475 static int hl_pmops_suspend(struct device *dev)
0476 {
0477     struct hl_device *hdev = dev_get_drvdata(dev);
0478 
0479     pr_debug("Going to suspend PCI device\n");
0480 
0481     if (!hdev) {
0482         pr_err("device pointer is NULL in suspend\n");
0483         return 0;
0484     }
0485 
0486     return hl_device_suspend(hdev);
0487 }
0488 
0489 static int hl_pmops_resume(struct device *dev)
0490 {
0491     struct hl_device *hdev = dev_get_drvdata(dev);
0492 
0493     pr_debug("Going to resume PCI device\n");
0494 
0495     if (!hdev) {
0496         pr_err("device pointer is NULL in resume\n");
0497         return 0;
0498     }
0499 
0500     return hl_device_resume(hdev);
0501 }
0502 
0503 /**
0504  * hl_pci_probe - probe PCI habanalabs devices
0505  *
0506  * @pdev: pointer to pci device
0507  * @id: pointer to pci device id structure
0508  *
0509  * Standard PCI probe function for habanalabs device.
0510  * Create a new habanalabs device and initialize it according to the
0511  * device's type
0512  */
0513 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
0514 {
0515     struct hl_device *hdev;
0516     int rc;
0517 
0518     dev_info(&pdev->dev, HL_NAME
0519          " device found [%04x:%04x] (rev %x)\n",
0520          (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
0521 
0522     rc = create_hdev(&hdev, pdev);
0523     if (rc)
0524         return rc;
0525 
0526     pci_set_drvdata(pdev, hdev);
0527 
0528     pci_enable_pcie_error_reporting(pdev);
0529 
0530     rc = hl_device_init(hdev, hl_class);
0531     if (rc) {
0532         dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
0533         rc = -ENODEV;
0534         goto disable_device;
0535     }
0536 
0537     return 0;
0538 
0539 disable_device:
0540     pci_disable_pcie_error_reporting(pdev);
0541     pci_set_drvdata(pdev, NULL);
0542     destroy_hdev(hdev);
0543 
0544     return rc;
0545 }
0546 
0547 /*
0548  * hl_pci_remove - remove PCI habanalabs devices
0549  *
0550  * @pdev: pointer to pci device
0551  *
0552  * Standard PCI remove function for habanalabs device
0553  */
0554 static void hl_pci_remove(struct pci_dev *pdev)
0555 {
0556     struct hl_device *hdev;
0557 
0558     hdev = pci_get_drvdata(pdev);
0559     if (!hdev)
0560         return;
0561 
0562     hl_device_fini(hdev);
0563     pci_disable_pcie_error_reporting(pdev);
0564     pci_set_drvdata(pdev, NULL);
0565     destroy_hdev(hdev);
0566 }
0567 
0568 /**
0569  * hl_pci_err_detected - a PCI bus error detected on this device
0570  *
0571  * @pdev: pointer to pci device
0572  * @state: PCI error type
0573  *
0574  * Called by the PCI subsystem whenever a non-correctable
0575  * PCI bus error is detected
0576  */
0577 static pci_ers_result_t
0578 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
0579 {
0580     struct hl_device *hdev = pci_get_drvdata(pdev);
0581     enum pci_ers_result result;
0582 
0583     switch (state) {
0584     case pci_channel_io_normal:
0585         return PCI_ERS_RESULT_CAN_RECOVER;
0586 
0587     case pci_channel_io_frozen:
0588         dev_warn(hdev->dev, "frozen state error detected\n");
0589         result = PCI_ERS_RESULT_NEED_RESET;
0590         break;
0591 
0592     case pci_channel_io_perm_failure:
0593         dev_warn(hdev->dev, "failure state error detected\n");
0594         result = PCI_ERS_RESULT_DISCONNECT;
0595         break;
0596 
0597     default:
0598         result = PCI_ERS_RESULT_NONE;
0599     }
0600 
0601     hdev->asic_funcs->halt_engines(hdev, true, false);
0602 
0603     return result;
0604 }
0605 
0606 /**
0607  * hl_pci_err_resume - resume after a PCI slot reset
0608  *
0609  * @pdev: pointer to pci device
0610  *
0611  */
0612 static void hl_pci_err_resume(struct pci_dev *pdev)
0613 {
0614     struct hl_device *hdev = pci_get_drvdata(pdev);
0615 
0616     dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
0617     hl_device_resume(hdev);
0618 }
0619 
0620 /**
0621  * hl_pci_err_slot_reset - a PCI slot reset has just happened
0622  *
0623  * @pdev: pointer to pci device
0624  *
0625  * Determine if the driver can recover from the PCI slot reset
0626  */
0627 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
0628 {
0629     return PCI_ERS_RESULT_RECOVERED;
0630 }
0631 
0632 static const struct dev_pm_ops hl_pm_ops = {
0633     .suspend = hl_pmops_suspend,
0634     .resume = hl_pmops_resume,
0635 };
0636 
0637 static const struct pci_error_handlers hl_pci_err_handler = {
0638     .error_detected = hl_pci_err_detected,
0639     .slot_reset = hl_pci_err_slot_reset,
0640     .resume = hl_pci_err_resume,
0641 };
0642 
0643 static struct pci_driver hl_pci_driver = {
0644     .name = HL_NAME,
0645     .id_table = ids,
0646     .probe = hl_pci_probe,
0647     .remove = hl_pci_remove,
0648     .shutdown = hl_pci_remove,
0649     .driver = {
0650         .name = HL_NAME,
0651         .pm = &hl_pm_ops,
0652         .probe_type = PROBE_PREFER_ASYNCHRONOUS,
0653     },
0654     .err_handler = &hl_pci_err_handler,
0655 };
0656 
0657 /*
0658  * hl_init - Initialize the habanalabs kernel driver
0659  */
0660 static int __init hl_init(void)
0661 {
0662     int rc;
0663     dev_t dev;
0664 
0665     pr_info("loading driver\n");
0666 
0667     rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
0668     if (rc < 0) {
0669         pr_err("unable to get major\n");
0670         return rc;
0671     }
0672 
0673     hl_major = MAJOR(dev);
0674 
0675     hl_class = class_create(THIS_MODULE, HL_NAME);
0676     if (IS_ERR(hl_class)) {
0677         pr_err("failed to allocate class\n");
0678         rc = PTR_ERR(hl_class);
0679         goto remove_major;
0680     }
0681 
0682     hl_debugfs_init();
0683 
0684     rc = pci_register_driver(&hl_pci_driver);
0685     if (rc) {
0686         pr_err("failed to register pci device\n");
0687         goto remove_debugfs;
0688     }
0689 
0690     pr_debug("driver loaded\n");
0691 
0692     return 0;
0693 
0694 remove_debugfs:
0695     hl_debugfs_fini();
0696     class_destroy(hl_class);
0697 remove_major:
0698     unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
0699     return rc;
0700 }
0701 
0702 /*
0703  * hl_exit - Release all resources of the habanalabs kernel driver
0704  */
0705 static void __exit hl_exit(void)
0706 {
0707     pci_unregister_driver(&hl_pci_driver);
0708 
0709     /*
0710      * Removing debugfs must be after all devices or simulator devices
0711      * have been removed because otherwise we get a bug in the
0712      * debugfs module for referencing NULL objects
0713      */
0714     hl_debugfs_fini();
0715 
0716     class_destroy(hl_class);
0717     unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
0718 
0719     idr_destroy(&hl_devs_idr);
0720 
0721     pr_debug("driver removed\n");
0722 }
0723 
0724 module_init(hl_init);
0725 module_exit(hl_exit);