Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 
0003 /* P9 gzip sample code for demonstrating the P9 NX hardware interface.
0004  * Not intended for productive uses or for performance or compression
0005  * ratio measurements.  For simplicity of demonstration, this sample
0006  * code compresses in to fixed Huffman blocks only (Deflate btype=1)
0007  * and has very simple memory management.  Dynamic Huffman blocks
0008  * (Deflate btype=2) are more involved as detailed in the user guide.
0009  * Note also that /dev/crypto/gzip, VAS and skiboot support are
0010  * required.
0011  *
0012  * Copyright 2020 IBM Corp.
0013  *
0014  * https://github.com/libnxz/power-gzip for zlib api and other utils
0015  *
0016  * Author: Bulent Abali <abali@us.ibm.com>
0017  *
0018  * Definitions of acronyms used here. See
0019  * P9 NX Gzip Accelerator User's Manual for details:
0020  * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
0021  *
0022  * adler/crc: 32 bit checksums appended to stream tail
0023  * ce:       completion extension
0024  * cpb:      coprocessor parameter block (metadata)
0025  * crb:      coprocessor request block (command)
0026  * csb:      coprocessor status block (status)
0027  * dht:      dynamic huffman table
0028  * dde:      data descriptor element (address, length)
0029  * ddl:      list of ddes
0030  * dh/fh:    dynamic and fixed huffman types
0031  * fc:       coprocessor function code
0032  * histlen:  history/dictionary length
0033  * history:  sliding window of up to 32KB of data
0034  * lzcount:  Deflate LZ symbol counts
0035  * rembytecnt: remaining byte count
0036  * sfbt:     source final block type; last block's type during decomp
0037  * spbc:     source processed byte count
0038  * subc:     source unprocessed bit count
0039  * tebc:     target ending bit count; valid bits in the last byte
0040  * tpbc:     target processed byte count
0041  * vas:      virtual accelerator switch; the user mode interface
0042  */
0043 
0044 #define _ISOC11_SOURCE  // For aligned_alloc()
0045 #define _DEFAULT_SOURCE // For endian.h
0046 
0047 #include <stdio.h>
0048 #include <stdlib.h>
0049 #include <string.h>
0050 #include <unistd.h>
0051 #include <stdint.h>
0052 #include <sys/types.h>
0053 #include <sys/stat.h>
0054 #include <sys/time.h>
0055 #include <sys/fcntl.h>
0056 #include <sys/mman.h>
0057 #include <endian.h>
0058 #include <bits/endian.h>
0059 #include <sys/ioctl.h>
0060 #include <assert.h>
0061 #include <errno.h>
0062 #include <signal.h>
0063 #include "utils.h"
0064 #include "nxu.h"
0065 #include "nx.h"
0066 
0067 int nx_dbg;
0068 FILE *nx_gzip_log;
0069 
0070 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
0071 #define FNAME_MAX 1024
0072 #define FEXT ".nx.gz"
0073 
0074 #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
0075 
0076 /*
0077  * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
0078  */
0079 static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
0080                 uint32_t dstlen, int with_count,
0081                 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
0082 {
0083     uint32_t fc;
0084 
0085     assert(!!cmdp);
0086 
0087     put32(cmdp->crb, gzip_fc, 0);  /* clear */
0088     fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
0089                 GZIP_FC_COMPRESS_RESUME_FHT;
0090     putnn(cmdp->crb, gzip_fc, fc);
0091     putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
0092     memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
0093 
0094     /* Section 6.6 programming notes; spbc may be in two different
0095      * places depending on FC.
0096      */
0097     if (!with_count)
0098         put32(cmdp->cpb, out_spbc_comp, 0);
0099     else
0100         put32(cmdp->cpb, out_spbc_comp_with_count, 0);
0101 
0102     /* Figure 6-3 6-4; CSB location */
0103     put64(cmdp->crb, csb_address, 0);
0104     put64(cmdp->crb, csb_address,
0105           (uint64_t) &cmdp->crb.csb & csb_address_mask);
0106 
0107     /* Source direct dde (scatter-gather list) */
0108     clear_dde(cmdp->crb.source_dde);
0109     putnn(cmdp->crb.source_dde, dde_count, 0);
0110     put32(cmdp->crb.source_dde, ddebc, srclen);
0111     put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
0112 
0113     /* Target direct dde (scatter-gather list) */
0114     clear_dde(cmdp->crb.target_dde);
0115     putnn(cmdp->crb.target_dde, dde_count, 0);
0116     put32(cmdp->crb.target_dde, ddebc, dstlen);
0117     put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
0118 
0119     /* Submit the crb, the job descriptor, to the accelerator */
0120     return nxu_submit_job(cmdp, handle);
0121 }
0122 
0123 /*
0124  * Prepares a blank no filename no timestamp gzip header and returns
0125  * the number of bytes written to buf.
0126  * Gzip specification at https://tools.ietf.org/html/rfc1952
0127  */
0128 int gzip_header_blank(char *buf)
0129 {
0130     int i = 0;
0131 
0132     buf[i++] = 0x1f; /* ID1 */
0133     buf[i++] = 0x8b; /* ID2 */
0134     buf[i++] = 0x08; /* CM  */
0135     buf[i++] = 0x00; /* FLG */
0136     buf[i++] = 0x00; /* MTIME */
0137     buf[i++] = 0x00; /* MTIME */
0138     buf[i++] = 0x00; /* MTIME */
0139     buf[i++] = 0x00; /* MTIME */
0140     buf[i++] = 0x04; /* XFL 4=fastest */
0141     buf[i++] = 0x03; /* OS UNIX */
0142 
0143     return i;
0144 }
0145 
0146 /* Caller must free the allocated buffer return nonzero on error. */
0147 int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
0148 {
0149     struct stat statbuf;
0150     FILE *fp;
0151     char *p;
0152     size_t num_bytes;
0153 
0154     if (stat(fname, &statbuf)) {
0155         perror(fname);
0156         return(-1);
0157     }
0158     fp = fopen(fname, "r");
0159     if (fp == NULL) {
0160         perror(fname);
0161         return(-1);
0162     }
0163     assert(NULL != (p = (char *) malloc(statbuf.st_size)));
0164     num_bytes = fread(p, 1, statbuf.st_size, fp);
0165     if (ferror(fp) || (num_bytes != statbuf.st_size)) {
0166         perror(fname);
0167         return(-1);
0168     }
0169     *buf = p;
0170     *bufsize = num_bytes;
0171     return 0;
0172 }
0173 
0174 /* Returns nonzero on error */
0175 int write_output_file(char *fname, char *buf, size_t bufsize)
0176 {
0177     FILE *fp;
0178     size_t num_bytes;
0179 
0180     fp = fopen(fname, "w");
0181     if (fp == NULL) {
0182         perror(fname);
0183         return(-1);
0184     }
0185     num_bytes = fwrite(buf, 1, bufsize, fp);
0186     if (ferror(fp) || (num_bytes != bufsize)) {
0187         perror(fname);
0188         return(-1);
0189     }
0190     fclose(fp);
0191     return 0;
0192 }
0193 
0194 /*
0195  * Z_SYNC_FLUSH as described in zlib.h.
0196  * Returns number of appended bytes
0197  */
0198 int append_sync_flush(char *buf, int tebc, int final)
0199 {
0200     uint64_t flush;
0201     int shift = (tebc & 0x7);
0202 
0203     if (tebc > 0) {
0204         /* Last byte is partially full */
0205         buf = buf - 1;
0206         *buf = *buf & (unsigned char) ((1<<tebc)-1);
0207     } else
0208         *buf = 0;
0209     flush = ((0x1ULL & final) << shift) | *buf;
0210     shift = shift + 3; /* BFINAL and BTYPE written */
0211     shift = (shift <= 8) ? 8 : 16;
0212     flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
0213     shift = shift + 32;
0214     while (shift > 0) {
0215         *buf++ = (unsigned char) (flush & 0xffULL);
0216         flush = flush >> 8;
0217         shift = shift - 8;
0218     }
0219     return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
0220 }
0221 
0222 /*
0223  * Final deflate block bit.  This call assumes the block
0224  * beginning is byte aligned.
0225  */
0226 static void set_bfinal(void *buf, int bfinal)
0227 {
0228     char *b = buf;
0229 
0230     if (bfinal)
0231         *b = *b | (unsigned char) 0x01;
0232     else
0233         *b = *b & (unsigned char) 0xfe;
0234 }
0235 
0236 int compress_file(int argc, char **argv, void *handle)
0237 {
0238     char *inbuf, *outbuf, *srcbuf, *dstbuf;
0239     char outname[FNAME_MAX];
0240     uint32_t srclen, dstlen;
0241     uint32_t flushlen, chunk;
0242     size_t inlen, outlen, dsttotlen, srctotlen;
0243     uint32_t crc, spbc, tpbc, tebc;
0244     int lzcounts = 0;
0245     int cc;
0246     int num_hdr_bytes;
0247     struct nx_gzip_crb_cpb_t *cmdp;
0248     uint32_t pagelen = 65536;
0249     int fault_tries = NX_MAX_FAULTS;
0250     char buf[32];
0251 
0252     cmdp = (void *)(uintptr_t)
0253         aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
0254                   sizeof(struct nx_gzip_crb_cpb_t));
0255 
0256     if (argc != 2) {
0257         fprintf(stderr, "usage: %s <fname>\n", argv[0]);
0258         exit(-1);
0259     }
0260     if (read_alloc_input_file(argv[1], &inbuf, &inlen))
0261         exit(-1);
0262     fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
0263 
0264     /* Generous output buffer for header/trailer */
0265     outlen = 2 * inlen + 1024;
0266 
0267     assert(NULL != (outbuf = (char *)malloc(outlen)));
0268     nxu_touch_pages(outbuf, outlen, pagelen, 1);
0269 
0270     /*
0271      * On PowerVM, the hypervisor defines the maximum request buffer
0272      * size is defined and this value is available via sysfs.
0273      */
0274     if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
0275         chunk = atoi(buf);
0276     } else {
0277         /* sysfs entry is not available on PowerNV */
0278         /* Compress piecemeal in smallish chunks */
0279         chunk = 1<<22;
0280     }
0281 
0282     /* Write the gzip header to the stream */
0283     num_hdr_bytes = gzip_header_blank(outbuf);
0284     dstbuf    = outbuf + num_hdr_bytes;
0285     outlen    = outlen - num_hdr_bytes;
0286     dsttotlen = num_hdr_bytes;
0287 
0288     srcbuf    = inbuf;
0289     srctotlen = 0;
0290 
0291     /* Init the CRB, the coprocessor request block */
0292     memset(&cmdp->crb, 0, sizeof(cmdp->crb));
0293 
0294     /* Initial gzip crc32 */
0295     put32(cmdp->cpb, in_crc, 0);
0296 
0297     while (inlen > 0) {
0298 
0299         /* Submit chunk size source data per job */
0300         srclen = NX_MIN(chunk, inlen);
0301         /* Supply large target in case data expands */
0302         dstlen = NX_MIN(2*srclen, outlen);
0303 
0304         /* Page faults are handled by the user code */
0305 
0306         /* Fault-in pages; an improved code wouldn't touch so
0307          * many pages but would try to estimate the
0308          * compression ratio and adjust both the src and dst
0309          * touch amounts.
0310          */
0311         nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
0312                 1);
0313         nxu_touch_pages(srcbuf, srclen, pagelen, 0);
0314         nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
0315 
0316         cc = compress_fht_sample(
0317             srcbuf, srclen,
0318             dstbuf, dstlen,
0319             lzcounts, cmdp, handle);
0320 
0321         if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
0322             cc != ERR_NX_AT_FAULT) {
0323             fprintf(stderr, "nx error: cc= %d\n", cc);
0324             exit(-1);
0325         }
0326 
0327         /* Page faults are handled by the user code */
0328         if (cc == ERR_NX_AT_FAULT) {
0329             NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
0330             NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
0331                   fault_tries,
0332                   (unsigned long long) cmdp->crb.csb.fsaddr));
0333             fault_tries--;
0334             if (fault_tries > 0) {
0335                 continue;
0336             } else {
0337                 fprintf(stderr, "error: cannot progress; ");
0338                 fprintf(stderr, "too many faults\n");
0339                 exit(-1);
0340             }
0341         }
0342 
0343         fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
0344 
0345         inlen     = inlen - srclen;
0346         srcbuf    = srcbuf + srclen;
0347         srctotlen = srctotlen + srclen;
0348 
0349         /* Two possible locations for spbc depending on the function
0350          * code.
0351          */
0352         spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
0353             get32(cmdp->cpb, out_spbc_comp_with_count);
0354         assert(spbc == srclen);
0355 
0356         /* Target byte count */
0357         tpbc = get32(cmdp->crb.csb, tpbc);
0358         /* Target ending bit count */
0359         tebc = getnn(cmdp->cpb, out_tebc);
0360         NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
0361         NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
0362 
0363         if (inlen > 0) { /* More chunks to go */
0364             set_bfinal(dstbuf, 0);
0365             dstbuf    = dstbuf + tpbc;
0366             dsttotlen = dsttotlen + tpbc;
0367             outlen    = outlen - tpbc;
0368             /* Round up to the next byte with a flush
0369              * block; do not set the BFINAqL bit.
0370              */
0371             flushlen  = append_sync_flush(dstbuf, tebc, 0);
0372             dsttotlen = dsttotlen + flushlen;
0373             outlen    = outlen - flushlen;
0374             dstbuf    = dstbuf + flushlen;
0375             NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
0376                     flushlen));
0377         } else {  /* Done */
0378             /* Set the BFINAL bit of the last block per Deflate
0379              * specification.
0380              */
0381             set_bfinal(dstbuf, 1);
0382             dstbuf    = dstbuf + tpbc;
0383             dsttotlen = dsttotlen + tpbc;
0384             outlen    = outlen - tpbc;
0385         }
0386 
0387         /* Resuming crc32 for the next chunk */
0388         crc = get32(cmdp->cpb, out_crc);
0389         put32(cmdp->cpb, in_crc, crc);
0390         crc = be32toh(crc);
0391     }
0392 
0393     /* Append crc32 and ISIZE to the end */
0394     memcpy(dstbuf, &crc, 4);
0395     memcpy(dstbuf+4, &srctotlen, 4);
0396     dsttotlen = dsttotlen + 8;
0397     outlen    = outlen - 8;
0398 
0399     assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
0400     strcpy(outname, argv[1]);
0401     strcat(outname, FEXT);
0402     if (write_output_file(outname, outbuf, dsttotlen)) {
0403         fprintf(stderr, "write error: %s\n", outname);
0404         exit(-1);
0405     }
0406 
0407     fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
0408         dsttotlen);
0409     fprintf(stderr, "crc32 checksum = %08x\n", crc);
0410 
0411     if (inbuf != NULL)
0412         free(inbuf);
0413 
0414     if (outbuf != NULL)
0415         free(outbuf);
0416 
0417     return 0;
0418 }
0419 
0420 int main(int argc, char **argv)
0421 {
0422     int rc;
0423     struct sigaction act;
0424     void *handle;
0425 
0426     nx_dbg = 0;
0427     nx_gzip_log = NULL;
0428     act.sa_handler = 0;
0429     act.sa_sigaction = nxu_sigsegv_handler;
0430     act.sa_flags = SA_SIGINFO;
0431     act.sa_restorer = 0;
0432     sigemptyset(&act.sa_mask);
0433     sigaction(SIGSEGV, &act, NULL);
0434 
0435     handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
0436     if (!handle) {
0437         fprintf(stderr, "Unable to init NX, errno %d\n", errno);
0438         exit(-1);
0439     }
0440 
0441     rc = compress_file(argc, argv, handle);
0442 
0443     nx_function_end(handle);
0444 
0445     return rc;
0446 }