Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 #ifndef CEPH_RADOS_H
0003 #define CEPH_RADOS_H
0004 
0005 /*
0006  * Data types for the Ceph distributed object storage layer RADOS
0007  * (Reliable Autonomic Distributed Object Store).
0008  */
0009 
0010 #include <linux/ceph/msgr.h>
0011 
0012 /*
0013  * fs id
0014  */
0015 struct ceph_fsid {
0016     unsigned char fsid[16];
0017 };
0018 
0019 static inline int ceph_fsid_compare(const struct ceph_fsid *a,
0020                     const struct ceph_fsid *b)
0021 {
0022     return memcmp(a, b, sizeof(*a));
0023 }
0024 
0025 /*
0026  * ino, object, etc.
0027  */
0028 typedef __le64 ceph_snapid_t;
0029 #define CEPH_SNAPDIR ((__u64)(-1))  /* reserved for hidden .snap dir */
0030 #define CEPH_NOSNAP  ((__u64)(-2))  /* "head", "live" revision */
0031 #define CEPH_MAXSNAP ((__u64)(-3))  /* largest valid snapid */
0032 
0033 struct ceph_timespec {
0034     __le32 tv_sec;
0035     __le32 tv_nsec;
0036 } __attribute__ ((packed));
0037 
0038 
0039 /*
0040  * object layout - how objects are mapped into PGs
0041  */
0042 #define CEPH_OBJECT_LAYOUT_HASH     1
0043 #define CEPH_OBJECT_LAYOUT_LINEAR   2
0044 #define CEPH_OBJECT_LAYOUT_HASHINO  3
0045 
0046 /*
0047  * pg layout -- how PGs are mapped onto (sets of) OSDs
0048  */
0049 #define CEPH_PG_LAYOUT_CRUSH  0
0050 #define CEPH_PG_LAYOUT_HASH   1
0051 #define CEPH_PG_LAYOUT_LINEAR 2
0052 #define CEPH_PG_LAYOUT_HYBRID 3
0053 
0054 #define CEPH_PG_MAX_SIZE      32  /* max # osds in a single pg */
0055 
0056 /*
0057  * placement group.
0058  * we encode this into one __le64.
0059  */
0060 struct ceph_pg_v1 {
0061     __le16 preferred; /* preferred primary osd */
0062     __le16 ps;        /* placement seed */
0063     __le32 pool;      /* object pool */
0064 } __attribute__ ((packed));
0065 
0066 /*
0067  * pg_pool is a set of pgs storing a pool of objects
0068  *
0069  *  pg_num -- base number of pseudorandomly placed pgs
0070  *
0071  *  pgp_num -- effective number when calculating pg placement.  this
0072  * is used for pg_num increases.  new pgs result in data being "split"
0073  * into new pgs.  for this to proceed smoothly, new pgs are intiially
0074  * colocated with their parents; that is, pgp_num doesn't increase
0075  * until the new pgs have successfully split.  only _then_ are the new
0076  * pgs placed independently.
0077  *
0078  *  lpg_num -- localized pg count (per device).  replicas are randomly
0079  * selected.
0080  *
0081  *  lpgp_num -- as above.
0082  */
0083 #define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
0084 
0085 #define CEPH_POOL_TYPE_REP     1
0086 #define CEPH_POOL_TYPE_RAID4   2 /* never implemented */
0087 #define CEPH_POOL_TYPE_EC      3
0088 
0089 /*
0090  * stable_mod func is used to control number of placement groups.
0091  * similar to straight-up modulo, but produces a stable mapping as b
0092  * increases over time.  b is the number of bins, and bmask is the
0093  * containing power of 2 minus 1.
0094  *
0095  * b <= bmask and bmask=(2**n)-1
0096  * e.g., b=12 -> bmask=15, b=123 -> bmask=127
0097  */
0098 static inline int ceph_stable_mod(int x, int b, int bmask)
0099 {
0100     if ((x & bmask) < b)
0101         return x & bmask;
0102     else
0103         return x & (bmask >> 1);
0104 }
0105 
0106 /*
0107  * object layout - how a given object should be stored.
0108  */
0109 struct ceph_object_layout {
0110     struct ceph_pg_v1 ol_pgid;   /* raw pg, with _full_ ps precision. */
0111     __le32 ol_stripe_unit;    /* for per-object parity, if any */
0112 } __attribute__ ((packed));
0113 
0114 /*
0115  * compound epoch+version, used by storage layer to serialize mutations
0116  */
0117 struct ceph_eversion {
0118     __le64 version;
0119     __le32 epoch;
0120 } __attribute__ ((packed));
0121 
0122 /*
0123  * osd map bits
0124  */
0125 
0126 /* status bits */
0127 #define CEPH_OSD_EXISTS  (1<<0)
0128 #define CEPH_OSD_UP      (1<<1)
0129 #define CEPH_OSD_AUTOOUT (1<<2)  /* osd was automatically marked out */
0130 #define CEPH_OSD_NEW     (1<<3)  /* osd is new, never marked in */
0131 
0132 extern const char *ceph_osd_state_name(int s);
0133 
0134 /* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
0135 #define CEPH_OSD_IN  0x10000
0136 #define CEPH_OSD_OUT 0
0137 
0138 /* osd primary-affinity.  fixed point value: 0x10000 == baseline */
0139 #define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000
0140 #define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000
0141 
0142 
0143 /*
0144  * osd map flag bits
0145  */
0146 #define CEPH_OSDMAP_NEARFULL (1<<0)  /* sync writes (near ENOSPC),
0147                     not set since ~luminous */
0148 #define CEPH_OSDMAP_FULL     (1<<1)  /* no data writes (ENOSPC),
0149                     not set since ~luminous */
0150 #define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
0151 #define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
0152 #define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
0153 #define CEPH_OSDMAP_NOUP     (1<<5)  /* block osd boot */
0154 #define CEPH_OSDMAP_NODOWN   (1<<6)  /* block osd mark-down/failure */
0155 #define CEPH_OSDMAP_NOOUT    (1<<7)  /* block osd auto mark-out */
0156 #define CEPH_OSDMAP_NOIN     (1<<8)  /* block osd auto mark-in */
0157 #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
0158 #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
0159 #define CEPH_OSDMAP_NOSCRUB  (1<<11) /* block periodic scrub */
0160 #define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
0161 #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
0162 #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
0163 #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
0164 #define CEPH_OSDMAP_REQUIRE_JEWEL    (1<<16) /* require jewel for booting osds */
0165 #define CEPH_OSDMAP_REQUIRE_KRAKEN   (1<<17) /* require kraken for booting osds */
0166 #define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
0167 #define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
0168 
0169 /*
0170  * The error code to return when an OSD can't handle a write
0171  * because it is too large.
0172  */
0173 #define OSD_WRITETOOBIG EMSGSIZE
0174 
0175 /*
0176  * osd ops
0177  *
0178  * WARNING: do not use these op codes directly.  Use the helpers
0179  * defined below instead.  In certain cases, op code behavior was
0180  * redefined, resulting in special-cases in the helpers.
0181  */
0182 #define CEPH_OSD_OP_MODE       0xf000
0183 #define CEPH_OSD_OP_MODE_RD    0x1000
0184 #define CEPH_OSD_OP_MODE_WR    0x2000
0185 #define CEPH_OSD_OP_MODE_RMW   0x3000
0186 #define CEPH_OSD_OP_MODE_SUB   0x4000
0187 #define CEPH_OSD_OP_MODE_CACHE 0x8000
0188 
0189 #define CEPH_OSD_OP_TYPE       0x0f00
0190 #define CEPH_OSD_OP_TYPE_LOCK  0x0100
0191 #define CEPH_OSD_OP_TYPE_DATA  0x0200
0192 #define CEPH_OSD_OP_TYPE_ATTR  0x0300
0193 #define CEPH_OSD_OP_TYPE_EXEC  0x0400
0194 #define CEPH_OSD_OP_TYPE_PG    0x0500
0195 #define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
0196 
0197 #define __CEPH_OSD_OP1(mode, nr) \
0198     (CEPH_OSD_OP_MODE_##mode | (nr))
0199 
0200 #define __CEPH_OSD_OP(mode, type, nr) \
0201     (CEPH_OSD_OP_MODE_##mode | CEPH_OSD_OP_TYPE_##type | (nr))
0202 
0203 #define __CEPH_FORALL_OSD_OPS(f)                        \
0204     /** data **/                                \
0205     /* read */                              \
0206     f(READ,     __CEPH_OSD_OP(RD, DATA, 1), "read")         \
0207     f(STAT,     __CEPH_OSD_OP(RD, DATA, 2), "stat")         \
0208     f(MAPEXT,   __CEPH_OSD_OP(RD, DATA, 3), "mapext")       \
0209                                         \
0210     /* fancy read */                            \
0211     f(MASKTRUNC,    __CEPH_OSD_OP(RD, DATA, 4), "masktrunc")        \
0212     f(SPARSE_READ,  __CEPH_OSD_OP(RD, DATA, 5), "sparse-read")      \
0213                                         \
0214     f(NOTIFY,   __CEPH_OSD_OP(RD, DATA, 6), "notify")       \
0215     f(NOTIFY_ACK,   __CEPH_OSD_OP(RD, DATA, 7), "notify-ack")       \
0216                                         \
0217     /* versioning */                            \
0218     f(ASSERT_VER,   __CEPH_OSD_OP(RD, DATA, 8), "assert-version")   \
0219                                         \
0220     f(LIST_WATCHERS, __CEPH_OSD_OP(RD, DATA, 9),    "list-watchers")    \
0221                                         \
0222     f(LIST_SNAPS,   __CEPH_OSD_OP(RD, DATA, 10),    "list-snaps")       \
0223                                         \
0224     /* sync */                              \
0225     f(SYNC_READ,    __CEPH_OSD_OP(RD, DATA, 11),    "sync_read")        \
0226                                         \
0227     /* write */                             \
0228     f(WRITE,    __CEPH_OSD_OP(WR, DATA, 1), "write")        \
0229     f(WRITEFULL,    __CEPH_OSD_OP(WR, DATA, 2), "writefull")        \
0230     f(TRUNCATE, __CEPH_OSD_OP(WR, DATA, 3), "truncate")     \
0231     f(ZERO,     __CEPH_OSD_OP(WR, DATA, 4), "zero")         \
0232     f(DELETE,   __CEPH_OSD_OP(WR, DATA, 5), "delete")       \
0233                                         \
0234     /* fancy write */                           \
0235     f(APPEND,   __CEPH_OSD_OP(WR, DATA, 6), "append")       \
0236     f(SETTRUNC, __CEPH_OSD_OP(WR, DATA, 8), "settrunc")     \
0237     f(TRIMTRUNC,    __CEPH_OSD_OP(WR, DATA, 9), "trimtrunc")        \
0238                                         \
0239     f(TMAPUP,   __CEPH_OSD_OP(RMW, DATA, 10),   "tmapup")       \
0240     f(TMAPPUT,  __CEPH_OSD_OP(WR, DATA, 11),    "tmapput")      \
0241     f(TMAPGET,  __CEPH_OSD_OP(RD, DATA, 12),    "tmapget")      \
0242                                         \
0243     f(CREATE,   __CEPH_OSD_OP(WR, DATA, 13),    "create")       \
0244     f(ROLLBACK, __CEPH_OSD_OP(WR, DATA, 14),    "rollback")     \
0245                                         \
0246     f(WATCH,    __CEPH_OSD_OP(WR, DATA, 15),    "watch")        \
0247                                         \
0248     /* omap */                              \
0249     f(OMAPGETKEYS,  __CEPH_OSD_OP(RD, DATA, 17),    "omap-get-keys")    \
0250     f(OMAPGETVALS,  __CEPH_OSD_OP(RD, DATA, 18),    "omap-get-vals")    \
0251     f(OMAPGETHEADER, __CEPH_OSD_OP(RD, DATA, 19),   "omap-get-header")  \
0252     f(OMAPGETVALSBYKEYS, __CEPH_OSD_OP(RD, DATA, 20), "omap-get-vals-by-keys") \
0253     f(OMAPSETVALS,  __CEPH_OSD_OP(WR, DATA, 21),    "omap-set-vals")    \
0254     f(OMAPSETHEADER, __CEPH_OSD_OP(WR, DATA, 22),   "omap-set-header")  \
0255     f(OMAPCLEAR,    __CEPH_OSD_OP(WR, DATA, 23),    "omap-clear")       \
0256     f(OMAPRMKEYS,   __CEPH_OSD_OP(WR, DATA, 24),    "omap-rm-keys")     \
0257     f(OMAP_CMP, __CEPH_OSD_OP(RD, DATA, 25),    "omap-cmp")     \
0258                                         \
0259     /* tiering */                               \
0260     f(COPY_FROM,    __CEPH_OSD_OP(WR, DATA, 26),    "copy-from")        \
0261     f(COPY_FROM2,   __CEPH_OSD_OP(WR, DATA, 45),    "copy-from2")       \
0262     f(COPY_GET_CLASSIC, __CEPH_OSD_OP(RD, DATA, 27), "copy-get-classic") \
0263     f(UNDIRTY,  __CEPH_OSD_OP(WR, DATA, 28),    "undirty")      \
0264     f(ISDIRTY,  __CEPH_OSD_OP(RD, DATA, 29),    "isdirty")      \
0265     f(COPY_GET, __CEPH_OSD_OP(RD, DATA, 30),    "copy-get")     \
0266     f(CACHE_FLUSH,  __CEPH_OSD_OP(CACHE, DATA, 31), "cache-flush")      \
0267     f(CACHE_EVICT,  __CEPH_OSD_OP(CACHE, DATA, 32), "cache-evict")      \
0268     f(CACHE_TRY_FLUSH, __CEPH_OSD_OP(CACHE, DATA, 33), "cache-try-flush") \
0269                                         \
0270     /* convert tmap to omap */                      \
0271     f(TMAP2OMAP,    __CEPH_OSD_OP(RMW, DATA, 34),   "tmap2omap")        \
0272                                         \
0273     /* hints */                             \
0274     f(SETALLOCHINT, __CEPH_OSD_OP(WR, DATA, 35),    "set-alloc-hint")   \
0275                                         \
0276     /** multi **/                               \
0277     f(CLONERANGE,   __CEPH_OSD_OP(WR, MULTI, 1),    "clonerange")       \
0278     f(ASSERT_SRC_VERSION, __CEPH_OSD_OP(RD, MULTI, 2), "assert-src-version") \
0279     f(SRC_CMPXATTR, __CEPH_OSD_OP(RD, MULTI, 3),    "src-cmpxattr")     \
0280                                         \
0281     /** attrs **/                               \
0282     /* read */                              \
0283     f(GETXATTR, __CEPH_OSD_OP(RD, ATTR, 1), "getxattr")     \
0284     f(GETXATTRS,    __CEPH_OSD_OP(RD, ATTR, 2), "getxattrs")        \
0285     f(CMPXATTR, __CEPH_OSD_OP(RD, ATTR, 3), "cmpxattr")     \
0286                                         \
0287     /* write */                             \
0288     f(SETXATTR, __CEPH_OSD_OP(WR, ATTR, 1), "setxattr")     \
0289     f(SETXATTRS,    __CEPH_OSD_OP(WR, ATTR, 2), "setxattrs")        \
0290     f(RESETXATTRS,  __CEPH_OSD_OP(WR, ATTR, 3), "resetxattrs")      \
0291     f(RMXATTR,  __CEPH_OSD_OP(WR, ATTR, 4), "rmxattr")      \
0292                                         \
0293     /** subop **/                               \
0294     f(PULL,     __CEPH_OSD_OP1(SUB, 1),     "pull")         \
0295     f(PUSH,     __CEPH_OSD_OP1(SUB, 2),     "push")         \
0296     f(BALANCEREADS, __CEPH_OSD_OP1(SUB, 3),     "balance-reads")    \
0297     f(UNBALANCEREADS, __CEPH_OSD_OP1(SUB, 4),   "unbalance-reads")  \
0298     f(SCRUB,    __CEPH_OSD_OP1(SUB, 5),     "scrub")        \
0299     f(SCRUB_RESERVE, __CEPH_OSD_OP1(SUB, 6),    "scrub-reserve")    \
0300     f(SCRUB_UNRESERVE, __CEPH_OSD_OP1(SUB, 7),  "scrub-unreserve")  \
0301     f(SCRUB_STOP,   __CEPH_OSD_OP1(SUB, 8),     "scrub-stop")       \
0302     f(SCRUB_MAP,    __CEPH_OSD_OP1(SUB, 9),     "scrub-map")        \
0303                                         \
0304     /** lock **/                                \
0305     f(WRLOCK,   __CEPH_OSD_OP(WR, LOCK, 1), "wrlock")       \
0306     f(WRUNLOCK, __CEPH_OSD_OP(WR, LOCK, 2), "wrunlock")     \
0307     f(RDLOCK,   __CEPH_OSD_OP(WR, LOCK, 3), "rdlock")       \
0308     f(RDUNLOCK, __CEPH_OSD_OP(WR, LOCK, 4), "rdunlock")     \
0309     f(UPLOCK,   __CEPH_OSD_OP(WR, LOCK, 5), "uplock")       \
0310     f(DNLOCK,   __CEPH_OSD_OP(WR, LOCK, 6), "dnlock")       \
0311                                         \
0312     /** exec **/                                \
0313     /* note: the RD bit here is wrong; see special-case below in helper */ \
0314     f(CALL,     __CEPH_OSD_OP(RD, EXEC, 1), "call")         \
0315                                         \
0316     /** pg **/                              \
0317     f(PGLS,     __CEPH_OSD_OP(RD, PG, 1),   "pgls")         \
0318     f(PGLS_FILTER,  __CEPH_OSD_OP(RD, PG, 2),   "pgls-filter")      \
0319     f(PG_HITSET_LS, __CEPH_OSD_OP(RD, PG, 3),   "pg-hitset-ls")     \
0320     f(PG_HITSET_GET, __CEPH_OSD_OP(RD, PG, 4),  "pg-hitset-get")
0321 
0322 enum {
0323 #define GENERATE_ENUM_ENTRY(op, opcode, str)    CEPH_OSD_OP_##op = (opcode),
0324 __CEPH_FORALL_OSD_OPS(GENERATE_ENUM_ENTRY)
0325 #undef GENERATE_ENUM_ENTRY
0326 };
0327 
0328 static inline int ceph_osd_op_type_lock(int op)
0329 {
0330     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
0331 }
0332 static inline int ceph_osd_op_type_data(int op)
0333 {
0334     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
0335 }
0336 static inline int ceph_osd_op_type_attr(int op)
0337 {
0338     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
0339 }
0340 static inline int ceph_osd_op_type_exec(int op)
0341 {
0342     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
0343 }
0344 static inline int ceph_osd_op_type_pg(int op)
0345 {
0346     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
0347 }
0348 static inline int ceph_osd_op_type_multi(int op)
0349 {
0350     return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
0351 }
0352 
0353 static inline int ceph_osd_op_mode_subop(int op)
0354 {
0355     return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
0356 }
0357 static inline int ceph_osd_op_mode_read(int op)
0358 {
0359     return (op & CEPH_OSD_OP_MODE_RD) &&
0360         op != CEPH_OSD_OP_CALL;
0361 }
0362 static inline int ceph_osd_op_mode_modify(int op)
0363 {
0364     return op & CEPH_OSD_OP_MODE_WR;
0365 }
0366 
0367 /*
0368  * note that the following tmap stuff is also defined in the ceph librados.h
0369  * any modification here needs to be updated there
0370  */
0371 #define CEPH_OSD_TMAP_HDR 'h'
0372 #define CEPH_OSD_TMAP_SET 's'
0373 #define CEPH_OSD_TMAP_CREATE 'c' /* create key */
0374 #define CEPH_OSD_TMAP_RM  'r'
0375 #define CEPH_OSD_TMAP_RMSLOPPY 'R'
0376 
0377 extern const char *ceph_osd_op_name(int op);
0378 
0379 /*
0380  * osd op flags
0381  *
0382  * An op may be READ, WRITE, or READ|WRITE.
0383  */
0384 enum {
0385     CEPH_OSD_FLAG_ACK =            0x0001,  /* want (or is) "ack" ack */
0386     CEPH_OSD_FLAG_ONNVRAM =        0x0002,  /* want (or is) "onnvram" ack */
0387     CEPH_OSD_FLAG_ONDISK =         0x0004,  /* want (or is) "ondisk" ack */
0388     CEPH_OSD_FLAG_RETRY =          0x0008,  /* resend attempt */
0389     CEPH_OSD_FLAG_READ =           0x0010,  /* op may read */
0390     CEPH_OSD_FLAG_WRITE =          0x0020,  /* op may write */
0391     CEPH_OSD_FLAG_ORDERSNAP =      0x0040,  /* EOLDSNAP if snapc is out of order */
0392     CEPH_OSD_FLAG_PEERSTAT_OLD =   0x0080,  /* DEPRECATED msg includes osd_peer_stat */
0393     CEPH_OSD_FLAG_BALANCE_READS =  0x0100,
0394     CEPH_OSD_FLAG_PARALLELEXEC =   0x0200,  /* execute op in parallel */
0395     CEPH_OSD_FLAG_PGOP =           0x0400,  /* pg op, no object */
0396     CEPH_OSD_FLAG_EXEC =           0x0800,  /* op may exec */
0397     CEPH_OSD_FLAG_EXEC_PUBLIC =    0x1000,  /* DEPRECATED op may exec (public) */
0398     CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000,  /* read from nearby replica, if any */
0399     CEPH_OSD_FLAG_RWORDERED =      0x4000,  /* order wrt concurrent reads */
0400     CEPH_OSD_FLAG_IGNORE_CACHE =   0x8000,  /* ignore cache logic */
0401     CEPH_OSD_FLAG_SKIPRWLOCKS =   0x10000,  /* skip rw locks */
0402     CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
0403     CEPH_OSD_FLAG_FLUSH =         0x40000,  /* this is part of flush */
0404     CEPH_OSD_FLAG_MAP_SNAP_CLONE = 0x80000,  /* map snap direct to clone id */
0405     CEPH_OSD_FLAG_ENFORCE_SNAPC   = 0x100000,  /* use snapc provided even if
0406                               pool uses pool snaps */
0407     CEPH_OSD_FLAG_REDIRECTED   = 0x200000,  /* op has been redirected */
0408     CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000,  /* redirect bit is authoritative */
0409     CEPH_OSD_FLAG_FULL_TRY =    0x800000,  /* try op despite full flag */
0410     CEPH_OSD_FLAG_FULL_FORCE = 0x1000000,  /* force op despite full flag */
0411 };
0412 
0413 enum {
0414     CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
0415     CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
0416     CEPH_OSD_OP_FLAG_FADVISE_RANDOM     = 0x4, /* the op is random */
0417     CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
0418     CEPH_OSD_OP_FLAG_FADVISE_WILLNEED   = 0x10,/* data will be accessed in
0419                               the near future */
0420     CEPH_OSD_OP_FLAG_FADVISE_DONTNEED   = 0x20,/* data will not be accessed
0421                               in the near future */
0422     CEPH_OSD_OP_FLAG_FADVISE_NOCACHE    = 0x40,/* data will be accessed only
0423                               once by this client */
0424 };
0425 
0426 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
0427 #define EBLOCKLISTED ESHUTDOWN /* blocklisted */
0428 
0429 /* xattr comparison */
0430 enum {
0431     CEPH_OSD_CMPXATTR_OP_NOP = 0,
0432     CEPH_OSD_CMPXATTR_OP_EQ  = 1,
0433     CEPH_OSD_CMPXATTR_OP_NE  = 2,
0434     CEPH_OSD_CMPXATTR_OP_GT  = 3,
0435     CEPH_OSD_CMPXATTR_OP_GTE = 4,
0436     CEPH_OSD_CMPXATTR_OP_LT  = 5,
0437     CEPH_OSD_CMPXATTR_OP_LTE = 6
0438 };
0439 
0440 enum {
0441     CEPH_OSD_CMPXATTR_MODE_STRING = 1,
0442     CEPH_OSD_CMPXATTR_MODE_U64    = 2
0443 };
0444 
0445 enum {
0446     CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1,       /* part of a flush operation */
0447     CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
0448     CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4,   /* ignore osd cache logic */
0449     CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
0450                              * cloneid */
0451     CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16,     /* order with write */
0452     CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ = 32,  /* send truncate_{seq,size} */
0453 };
0454 
0455 enum {
0456     CEPH_OSD_WATCH_OP_UNWATCH = 0,
0457     CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
0458     /* note: use only ODD ids to prevent pre-giant code from
0459        interpreting the op as UNWATCH */
0460     CEPH_OSD_WATCH_OP_WATCH = 3,
0461     CEPH_OSD_WATCH_OP_RECONNECT = 5,
0462     CEPH_OSD_WATCH_OP_PING = 7,
0463 };
0464 
0465 const char *ceph_osd_watch_op_name(int o);
0466 
0467 enum {
0468     CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1,
0469     CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_WRITE = 2,
0470     CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
0471     CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_READ = 8,
0472     CEPH_OSD_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
0473     CEPH_OSD_ALLOC_HINT_FLAG_IMMUTABLE = 32,
0474     CEPH_OSD_ALLOC_HINT_FLAG_SHORTLIVED = 64,
0475     CEPH_OSD_ALLOC_HINT_FLAG_LONGLIVED = 128,
0476     CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
0477     CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512,
0478 };
0479 
0480 enum {
0481     CEPH_OSD_BACKOFF_OP_BLOCK = 1,
0482     CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
0483     CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
0484 };
0485 
0486 /*
0487  * an individual object operation.  each may be accompanied by some data
0488  * payload
0489  */
0490 struct ceph_osd_op {
0491     __le16 op;           /* CEPH_OSD_OP_* */
0492     __le32 flags;        /* CEPH_OSD_OP_FLAG_* */
0493     union {
0494         struct {
0495             __le64 offset, length;
0496             __le64 truncate_size;
0497             __le32 truncate_seq;
0498         } __attribute__ ((packed)) extent;
0499         struct {
0500             __le32 name_len;
0501             __le32 value_len;
0502             __u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
0503             __u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
0504         } __attribute__ ((packed)) xattr;
0505         struct {
0506             __u8 class_len;
0507             __u8 method_len;
0508             __u8 argc;
0509             __le32 indata_len;
0510         } __attribute__ ((packed)) cls;
0511         struct {
0512             __le64 cookie, count;
0513         } __attribute__ ((packed)) pgls;
0514             struct {
0515                 __le64 snapid;
0516             } __attribute__ ((packed)) snap;
0517         struct {
0518             __le64 cookie;
0519             __le64 ver;     /* no longer used */
0520             __u8 op;    /* CEPH_OSD_WATCH_OP_* */
0521             __le32 gen;     /* registration generation */
0522         } __attribute__ ((packed)) watch;
0523         struct {
0524             __le64 cookie;
0525         } __attribute__ ((packed)) notify;
0526         struct {
0527             __le64 offset, length;
0528             __le64 src_offset;
0529         } __attribute__ ((packed)) clonerange;
0530         struct {
0531             __le64 expected_object_size;
0532             __le64 expected_write_size;
0533             __le32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
0534         } __attribute__ ((packed)) alloc_hint;
0535         struct {
0536             __le64 snapid;
0537             __le64 src_version;
0538             __u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */
0539             /*
0540              * CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags
0541              * for src object, flags for dest object are in
0542              * ceph_osd_op::flags.
0543              */
0544             __le32 src_fadvise_flags;
0545         } __attribute__ ((packed)) copy_from;
0546     };
0547     __le32 payload_len;
0548 } __attribute__ ((packed));
0549 
0550 
0551 #endif