[PATCH 02/22] mpool: add in-memory struct definitions

nmeeramohide at micron.com nmeeramohide at micron.com
Mon Sep 28 12:45:14 EDT 2020


From: Nabeel M Mohamed <nmeeramohide at micron.com>

Add headers containing the basic in-memory structures used by mpool.

- mclass.h: media classes
- mlog.h: mlog objects
- mp.h, mpcore.h: mpool objects
- params.h: mpool parameters
- pd.h: pool drive interface
- pmd.h, pmd_obj.h: Metadata manager
- sb.h: superblock interface
- smap.h: space map interface

Co-developed-by: Greg Becker <gbecker at micron.com>
Signed-off-by: Greg Becker <gbecker at micron.com>
Co-developed-by: Pierre Labat <plabat at micron.com>
Signed-off-by: Pierre Labat <plabat at micron.com>
Co-developed-by: John Groves <jgroves at micron.com>
Signed-off-by: John Groves <jgroves at micron.com>
Signed-off-by: Nabeel M Mohamed <nmeeramohide at micron.com>
---
 drivers/mpool/mclass.h  | 137 +++++++++++
 drivers/mpool/mlog.h    | 212 +++++++++++++++++
 drivers/mpool/mp.h      | 231 +++++++++++++++++++
 drivers/mpool/mpcore.h  | 354 ++++++++++++++++++++++++++++
 drivers/mpool/params.h  | 116 ++++++++++
 drivers/mpool/pd.h      | 202 ++++++++++++++++
 drivers/mpool/pmd.h     | 379 ++++++++++++++++++++++++++++++
 drivers/mpool/pmd_obj.h | 499 ++++++++++++++++++++++++++++++++++++++++
 drivers/mpool/sb.h      | 162 +++++++++++++
 drivers/mpool/smap.h    | 334 +++++++++++++++++++++++++++
 10 files changed, 2626 insertions(+)
 create mode 100644 drivers/mpool/mclass.h
 create mode 100644 drivers/mpool/mlog.h
 create mode 100644 drivers/mpool/mp.h
 create mode 100644 drivers/mpool/mpcore.h
 create mode 100644 drivers/mpool/params.h
 create mode 100644 drivers/mpool/pd.h
 create mode 100644 drivers/mpool/pmd.h
 create mode 100644 drivers/mpool/pmd_obj.h
 create mode 100644 drivers/mpool/sb.h
 create mode 100644 drivers/mpool/smap.h

diff --git a/drivers/mpool/mclass.h b/drivers/mpool/mclass.h
new file mode 100644
index 000000000000..2ecdcd08de9f
--- /dev/null
+++ b/drivers/mpool/mclass.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_MCLASS_H
+#define MPOOL_MCLASS_H
+
+#include "mpool_ioctl.h"
+
+struct omf_devparm_descriptor;
+struct mpool_descriptor;
+struct mpcore_params;
+
+/*
+ * This file contains the media class structures definitions and prototypes
+ * private to mpool core.
+ */
+
+/**
+ * struct mc_parms - media class parameters
+ * @mcp_classp:    class performance characteristics, enum mp_media_classp
+ * @mcp_zonepg:    zone size in number of zone pages
+ * @mcp_sectorsz:  2^sectorsz is the logical sector size
+ * @mcp_devtype:   device type. Enum pd_devtype.
+ * @mcp_features:  ored bits from mp_mc_features
+ *
+ * Two PDs can't be placed in the same media class if they have different
+ * mc_parms.
+ */
+struct mc_parms {
+	u8  mcp_classp;
+	u32 mcp_zonepg;
+	u8  mcp_sectorsz;
+	u8  mcp_devtype;
+	u64 mcp_features;
+};
+
+/**
+ * struct mc_smap_parms - media class space map parameters
+ * @mcsp_spzone: percent spare zones for drives.
+ * @mcsp_rgnc: no. of space map zones for drives in each media class
+ * @mcsp_align: space map zone alignment for drives in each media class
+ */
+struct mc_smap_parms {
+	u8		mcsp_spzone;
+	u8		mcsp_rgnc;
+	u8		mcsp_align;
+};
+
+/**
+ * struct media_class - define a media class
+ * @mc_parms:  define a media class, content differ for each media class
+ * @mc_sparms: space map params for this media class
+ * @mc_pdmc:   active pdv entries grouped by media class array
+ * @mc_uacnt:  UNAVAIL status drive count in each media class
+ *
+ * Locking:
+ *    Protected by mp.pds_pdvlock.
+ */
+struct media_class {
+	struct mc_parms        mc_parms;
+	struct mc_smap_parms   mc_sparms;
+	s8                     mc_pdmc;
+	u8                     mc_uacnt;
+};
+
+/**
+ * mc_pd_prop2mc_parms() -  Convert PD properties into media class parameters.
+ * @pd_prop: input, pd properties.
+ * @mc_parms: output, media class parameters.
+ *
+ * Typically used before a lookup (mc_lookup_from_mc_parms()) to know in
+ * which media class a PD belongs to.
+ */
+void mc_pd_prop2mc_parms(struct pd_prop *pd_prop, struct mc_parms *mc_parms);
+
+/**
+ * mc_omf_devparm2mc_parms() - convert a omf_devparm_descriptor into an mc_parms.
+ * @omf_devparm: input
+ * @mc_parms: output
+ */
+void mc_omf_devparm2mc_parms(struct omf_devparm_descriptor *omf_devparm, struct mc_parms *mc_parms);
+
+/**
+ * mc_parms2omf_devparm() - convert a mc_parms in a omf_devparm_descriptor
+ * @mc_parms: input
+ * @omf_devparm: output
+ */
+void mc_parms2omf_devparm(struct mc_parms *mc_parms, struct omf_devparm_descriptor *omf_devparm);
+
+/**
+ * mc_cmp_omf_devparm() - check if two omf_devparm_descriptor corresponds
+ *	to the same media class.
+ * @omf_devparm1:
+ * @omf_devparm2:
+ *
+ * Returns 0 if in same media class.
+ */
+int mc_cmp_omf_devparm(struct omf_devparm_descriptor *omfd1, struct omf_devparm_descriptor *omfd2);
+
+/**
+ * mc_init_class() - initialize a media class
+ * @mc:
+ * @mc_parms: parameters of the media class
+ * @mcsp:     smap parameters for mc
+ */
+void mc_init_class(struct media_class *mc, struct mc_parms *mc_parms, struct mc_smap_parms *mcsp);
+
+/**
+ * mc_set_spzone() - set the percent spare on the media class mclass.
+ * @mc:
+ * @spzone:
+ *
+ * Return: 0, or -ENOENT if the specified mclass doesn't exist.
+ */
+int mc_set_spzone(struct media_class *mc, u8 spzone);
+
+/**
+ * mclass_isvalid() - Return true if the media class is valid.
+ * @mclass:
+ */
+static inline bool mclass_isvalid(enum mp_media_classp mclass)
+{
+	return (mclass >= 0 && mclass < MP_MED_NUMBER);
+}
+
+/**
+ * mc_smap_parms_get() - get space map params for the specified mclass.
+ * @mp:
+ * @mclass:
+ * @mcsp: (output)
+ */
+int mc_smap_parms_get(struct media_class *mc, struct mpcore_params *params,
+		      struct mc_smap_parms *mcsp);
+
+#endif /* MPOOL_MCLASS_H */
diff --git a/drivers/mpool/mlog.h b/drivers/mpool/mlog.h
new file mode 100644
index 000000000000..0de816335d55
--- /dev/null
+++ b/drivers/mpool/mlog.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+/*
+ * Defines functions for writing, reading, and managing the lifecycle of mlogs.
+ */
+
+#ifndef MPOOL_MLOG_H
+#define MPOOL_MLOG_H
+
+#include <linux/uio.h>
+
+#include "mpool_ioctl.h"
+
+#define MB       (1024 * 1024)
+struct pmd_layout;
+struct mpool_descriptor;
+struct mlog_descriptor;
+
+
+/**
+ * struct mlog_read_iter -
+ * @lri_layout: Layout of log being read
+ * @lri_soff:   Sector offset of next log block to read from
+ * @lri_gen:    Log generation number at iterator initialization
+ * @lri_roff:   Next offset in log block soff to read from
+ * @lri_rbidx:  Read buffer page index currently reading from
+ * @lri_sidx:   Log block index in lri_rbidx
+ * @lri_valid:  1 if iterator is valid; 0 otherwise
+ */
+struct mlog_read_iter {
+	struct pmd_layout  *lri_layout;
+	off_t               lri_soff;
+	u64                 lri_gen;
+	u16                 lri_roff;
+	u16                 lri_rbidx;
+	u16                 lri_sidx;
+	u8                  lri_valid;
+};
+
+/**
+ * struct mlog_fsetparms -
+ *
+ * @mfp_totsec: Total number of log blocks in mlog
+ * @mfp_secpga: Is sector size page-aligned?
+ * @mfp_lpgsz:  Size of each page in read/append buffer
+ * @mfp_npgmb:  No. of pages in 1 MiB buffer
+ * @mfp_sectsz: Sector size obtained from PD prop
+ * @mfp_nsecmb: No. of sectors/log blocks in 1 MiB buffer
+ * @mfp_nsecpg: No. of sectors/log blocks per page
+ */
+struct mlog_fsetparms {
+	u32    mfp_totsec;
+	bool   mfp_secpga;
+	u32    mfp_lpgsz;
+	u16    mfp_nlpgmb;
+	u16    mfp_sectsz;
+	u16    mfp_nsecmb;
+	u16    mfp_nseclpg;
+};
+
+/**
+ * struct mlog_stat - mlog open status (referenced by associated struct pmd_layout)
+ * @lst_citr:    Current mlog read iterator
+ * @lst_mfp:     Mlog flush set parameters
+ * @lst_abuf:    Append buffer, max 1 MiB size
+ * @lst_rbuf:    Read buffer, max 1 MiB size - immutable
+ * @lst_rsoff:   LB offset of the 1st log block in lst_rbuf
+ * @lst_rseoff:  LB offset of the last log block in lst_rbuf
+ * @lst_asoff:   LB offset of the 1st log block in CFS
+ * @lst_wsoff:   Offset of the accumulating log block
+ * @lst_abdirty: true, if append buffer is dirty
+ * @lst_pfsetid: Prev. fSetID of the first log block in CFS
+ * @lst_cfsetid: Current fSetID of the CFS
+ * @lst_cfssoff: Offset within the 1st log block from where CFS starts
+ * @lst_aoff:    Next byte offset[0, sectsz) to fill in the current log block
+ * @lst_abidx:   Index of current filling page in lst_abuf
+ * @lst_csem:    enforce compaction semantics if true
+ * @lst_cstart:  valid compaction start marker in log?
+ * @lst_cend:    valid compaction end marker in log?
+ */
+struct mlog_stat {
+	struct mlog_read_iter  lst_citr;
+	struct mlog_fsetparms  lst_mfp;
+	char  **lst_abuf;
+	char  **lst_rbuf;
+	off_t   lst_rsoff;
+	off_t   lst_rseoff;
+	off_t   lst_asoff;
+	off_t   lst_wsoff;
+	bool    lst_abdirty;
+	u32     lst_pfsetid;
+	u32     lst_cfsetid;
+	u16     lst_cfssoff;
+	u16     lst_aoff;
+	u16     lst_abidx;
+	u8      lst_csem;
+	u8      lst_cstart;
+	u8      lst_cend;
+};
+
+#define MLOG_TOTSEC(lstat)  ((lstat)->lst_mfp.mfp_totsec)
+#define MLOG_LPGSZ(lstat)   ((lstat)->lst_mfp.mfp_lpgsz)
+#define MLOG_NLPGMB(lstat)  ((lstat)->lst_mfp.mfp_nlpgmb)
+#define MLOG_SECSZ(lstat)   ((lstat)->lst_mfp.mfp_sectsz)
+#define MLOG_NSECMB(lstat)  ((lstat)->lst_mfp.mfp_nsecmb)
+#define MLOG_NSECLPG(lstat) ((lstat)->lst_mfp.mfp_nseclpg)
+
+#define IS_SECPGA(lstat)    ((lstat)->lst_mfp.mfp_secpga)
+
+/*
+ * mlog API functions
+ */
+
+/*
+ * Error codes: all mlog fns can return one or more of:
+ * -EINVAL = invalid fn args
+ * -ENOENT = log not open or logid not found
+ * -EFBIG = log full
+ * -EMSGSIZE = cstart w/o cend indicating a crash during compaction
+ * -ENODATA = malformed or corrupted log
+ * -EIO = unable to read/write log on media
+ * -ENOMEM = insufficient room in copy-out buffer
+ * -EBUSY = log is in erasing state; wait or retry erase
+ */
+
+int mlog_alloc(struct mpool_descriptor *mp, struct mlog_capacity *capreq,
+	       enum mp_media_classp mclassp, struct mlog_props *prop,
+	       struct mlog_descriptor **mlh);
+
+int mlog_realloc(struct mpool_descriptor *mp, u64 objid, struct mlog_capacity *capreq,
+		 enum mp_media_classp mclassp, struct mlog_props *prop,
+		 struct mlog_descriptor **mlh);
+
+int mlog_find_get(struct mpool_descriptor *mp, u64 objid, int which,
+		  struct mlog_props *prop, struct mlog_descriptor **mlh);
+
+void mlog_put(struct mlog_descriptor *layout);
+
+void mlog_lookup_rootids(u64 *id1, u64 *id2);
+
+int mlog_commit(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_abort(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_delete(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+/**
+ * mlog_open() - Open committed log, validate contents, and return its generation number
+ * @mp:
+ * @mlh:
+ * @flags:
+ * @gen: output
+ *
+ * If log is already open just returns gen; if csem is true enforces compaction
+ * semantics so that open fails if valid cstart/cend markers are not present.
+ *
+ * Returns: 0 if successful, -errno otherwise
+ */
+int mlog_open(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, u8 flags, u64 *gen);
+
+int mlog_close(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_gen(struct mlog_descriptor *mlh, u64 *gen);
+
+int mlog_empty(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, bool *empty);
+
+int mlog_erase(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, u64 mingen);
+
+int mlog_append_cstart(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_append_cend(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_append_data(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+		     char *buf, u64 buflen, int sync);
+
+int mlog_read_data_init(struct mlog_descriptor *mlh);
+
+/**
+ * mlog_read_data_next() -
+ * @mp:
+ * @mlh:
+ * @buf:
+ * @buflen:
+ * @rdlen:
+ *
+ * Returns:
+ *   If -EOVERFLOW is returned, then "buf" is too small to
+ *   hold the read data. Can be retried with a bigger receive buffer whose
+ *   size is returned in rdlen.
+ */
+int mlog_read_data_next(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+			char *buf, u64 buflen, u64 *rdlen);
+
+int mlog_get_props_ex(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+		      struct mlog_props_ex *prop);
+
+void mlog_precompact_alsz(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_rw_raw(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+		const struct kvec *iov, int iovcnt, u64 boff, u8 rw);
+
+void mlogutil_closeall(struct mpool_descriptor *mp);
+
+bool mlog_objid(u64 objid);
+
+struct pmd_layout *mlog2layout(struct mlog_descriptor *mlh);
+
+struct mlog_descriptor *layout2mlog(struct pmd_layout *layout);
+
+#endif /* MPOOL_MLOG_H */
diff --git a/drivers/mpool/mp.h b/drivers/mpool/mp.h
new file mode 100644
index 000000000000..e1570f8c8d0c
--- /dev/null
+++ b/drivers/mpool/mp.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_MP_H
+#define MPOOL_MP_H
+
+#include "mpool_ioctl.h"
+#include "uuid.h"
+#include "params.h"
+
+struct mpool_descriptor;
+
+#define MPOOL_OP_READ  0
+#define MPOOL_OP_WRITE 1
+#define PD_DEV_ID_PDUNAVAILABLE "DID_PDUNAVAILABLE"
+
+#define MPOOL_DRIVES_MAX       MP_MED_NUMBER
+#define MP_MED_ALL             MP_MED_NUMBER
+
+/* Object types */
+enum mp_obj_type {
+	MP_OBJ_UNDEF  = 0,
+	MP_OBJ_MBLOCK = 1,
+	MP_OBJ_MLOG   = 2,
+};
+
+/**
+ * struct mpool_config -
+ * @mc_oid1:
+ * @mc_oid2:
+ * @mc_uid:
+ * @mc_gid:
+ * @mc_mode:
+ * @mc_mclassp:
+ * @mc_captgt:
+ * @mc_ra_pages_max:
+ * @mc_vma_sz_max:
+ * @mc_utype:           user-defined type
+ * @mc_label:           user-defined label
+
+ */
+struct mpool_config {
+	u64                     mc_oid1;
+	u64                     mc_oid2;
+	uid_t                   mc_uid;
+	gid_t                   mc_gid;
+	mode_t                  mc_mode;
+	u32                     mc_rsvd0;
+	u64                     mc_captgt;
+	u32                     mc_ra_pages_max;
+	u32                     mc_vma_size_max;
+	u32                     mc_rsvd1;
+	u32                     mc_rsvd2;
+	u64                     mc_rsvd3;
+	u64                     mc_rsvd4;
+	uuid_le                 mc_utype;
+	char                    mc_label[MPOOL_LABELSZ_MAX];
+};
+
+/*
+ * mpool API functions
+ */
+
+/**
+ * mpool_create() - Create an mpool
+ * @mpname:
+ * @flags: enum mp_mgmt_flags
+ * @dpaths:
+ * @pd_prop: PDs properties obtained by mpool_create() caller.
+ * @params:  mpcore parameters
+ * @mlog_cap:
+ *
+ * Create an mpool from dcnt drive paths dpaths; store mpool metadata as
+ * specified by mdparm;
+ *
+ * Return:
+ * %0 if successful, -errno otherwise..
+ * ENODEV if insufficient number of drives meeting mdparm,
+ */
+int mpool_create(const char *name, u32 flags, char **dpaths, struct pd_prop *pd_prop,
+		 struct mpcore_params *params, u64 mlog_cap);
+
+/**
+ * mpool_activate() - Activate an mpool
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: properties of the PDs. dcnt elements.
+ * @mlog_cap:
+ * @params:   mpcore parameters
+ * @flags:
+ * @mpp: *mpp is set to NULL if error
+ *
+ * Activate mpool on dcnt drive paths dpaths; if force flag is set tolerate
+ * unavailable drives up to redundancy limit; if successful *mpp is a handle
+ * for the mpool.
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ * ENODEV if too many drives unavailable or failed,
+ * ENXIO if device previously removed from mpool and is no longer a member
+ */
+int mpool_activate(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u64 mlog_cap,
+		   struct mpcore_params *params, u32 flags, struct mpool_descriptor **mpp);
+
+
+/**
+ * mpool_deactivate() - Deactivate an mpool.
+ * @mp: mpool descriptor
+ *
+ * Deactivate mpool; caller must ensure no other thread can access mp; mp is
+ * invalid after call.
+ */
+int mpool_deactivate(struct mpool_descriptor *mp);
+
+/**
+ * mpool_destroy() - Destroy an mpool
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: PD properties.
+ * @flags:
+ *
+ * Destroy mpool on dcnt drive paths dpaths;
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ */
+int mpool_destroy(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u32 flags);
+
+/**
+ * mpool_rename() - Rename mpool to mp_newname
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: PD properties.
+ * @flags:
+ * @mp_newname:
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ */
+int mpool_rename(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u32 flags,
+		 const char *mp_newname);
+
+/**
+ * mpool_drive_add() - Add new drive dpath to mpool.
+ * @mp:
+ * @dpath:
+ * @pd_prop: PD properties.
+ *
+ * Return: %0 if successful; -enno otherwise...
+ */
+int mpool_drive_add(struct mpool_descriptor *mp, char *dpath, struct pd_prop *pd_prop);
+
+/**
+ * mpool_drive_spares() - Set percent spare zones to spzone for drives in media class mclassp.
+ * @mp:
+ * @mclassp:
+ * @spzone:
+ *
+ * Return: 0 if successful, -errno otherwise...
+ */
+int mpool_drive_spares(struct mpool_descriptor *mp, enum mp_media_classp mclassp, u8 spzone);
+
+/**
+ * mpool_mclass_get_cnt() - Get a count of media classes with drives in this mpool
+ * @mp:
+ * @info:
+ */
+void mpool_mclass_get_cnt(struct mpool_descriptor *mp, u32 *cnt);
+
+/**
+ * mpool_mclass_get() - Get a information on mcl_cnt media classes
+ * @mp:
+ * @mcic:
+ * @mciv:
+ *
+ * Return: 0 if successful, -errno otherwise...
+ */
+int mpool_mclass_get(struct mpool_descriptor *mp, u32 *mcxc, struct mpool_mclass_xprops *mcxv);
+
+/**
+ * mpool_get_xprops() - Retrieve extended mpool properties
+ * @mp:
+ * @prop:
+ */
+void mpool_get_xprops(struct mpool_descriptor *mp, struct mpool_xprops *xprops);
+
+/**
+ * mpool_get_devprops_by_name() - Fill in dprop for active drive with name pdname
+ * @mp:
+ * @pdname:
+ * @dprop:
+ *
+ * Return: %0 if success, -errno otherwise...
+ * -ENOENT if device with specified name cannot be found
+ */
+int mpool_get_devprops_by_name(struct mpool_descriptor *mp, char *pdname,
+			       struct mpool_devprops *dprop);
+
+/**
+ * mpool_get_usage() - Fill in stats with mpool space usage for the media class mclassp
+ * @mp:
+ * @mclassp:
+ * @usage:
+ *
+ * If mclassp is MCLASS_ALL, report on entire pool (all media classes).
+ *
+ * Return: %0 if successful; err_t otherwise...
+ */
+void
+mpool_get_usage(
+	struct mpool_descriptor    *mp,
+	enum mp_media_classp        mclassp,
+	struct mpool_usage         *usage);
+
+/**
+ * mpool_config_store() - store a config record in MDC0
+ * @mp:
+ * @cfg:
+ */
+int mpool_config_store(struct mpool_descriptor *mp, const struct mpool_config *cfg);
+
+/**
+ * mpool_config_fetch() - fetch the current mpool config
+ * @mp:
+ * @cfg:
+ */
+int mpool_config_fetch(struct mpool_descriptor *mp, struct mpool_config *cfg);
+
+#endif /* MPOOL_MP_H */
diff --git a/drivers/mpool/mpcore.h b/drivers/mpool/mpcore.h
new file mode 100644
index 000000000000..904763d49814
--- /dev/null
+++ b/drivers/mpool/mpcore.h
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_MPCORE_H
+#define MPOOL_MPCORE_H
+
+#include <linux/rbtree.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include "uuid.h"
+
+#include "mp.h"
+#include "pd.h"
+#include "smap.h"
+#include "mclass.h"
+#include "pmd.h"
+#include "params.h"
+
+extern struct rb_root mpool_pools;
+
+struct pmd_layout;
+
+/**
+ * enum mpool_status -
+ * @MPOOL_STAT_UNDEF:
+ * @MPOOL_STAT_OPTIMAL:
+ * @MPOOL_STAT_FAULTED:
+ */
+enum mpool_status {
+	MPOOL_STAT_UNDEF    = 0,
+	MPOOL_STAT_OPTIMAL  = 1,
+	MPOOL_STAT_FAULTED  = 2,
+	MPOOL_STAT_LAST = MPOOL_STAT_FAULTED,
+};
+
+_Static_assert((MPOOL_STAT_LAST < 256), "enum mpool_status must fit in u8");
+
+/**
+ * struct mpool_dev_info - Pool drive state, status, and params
+ * @pdi_devid:    UUID for this drive
+ * @pdi_parm:     drive parms
+ * @pdi_status:   enum pd_status value: drive status
+ * @pdi_ds:       drive space allocation info
+ * @pdi_rmap:     per allocation zone space maps rbtree array, node:
+ *                struct u64_to_u64_rb
+ * @pdi_rmlock:   lock protects per zone space maps
+ * @pdi_name:     device name (only the last path name component)
+ *
+ * Pool drive state, status, and params
+ *
+ * LOCKING:
+ *    devid, mclass : constant; no locking required
+ *    parm: constant EXCEPT in rare change of status from UNAVAIL; see below
+ *    status: usage does not require locking, but MUST get/set via accessors
+ *    state: protected by pdvlock in enclosing mpool_descriptor
+ *    ds: protected by ds.dalock defined in smap module
+ *    zmap[x]: protected by zmlock[x]
+ *
+ * parm fields are constant except in a rare change of status from UNAVAIL,
+ * during which a subset of the fields are modified.  see the pd module for
+ * details on how this is handled w/o requiring locking.
+ */
+struct mpool_dev_info {
+	atomic_t                pdi_status; /* Barriers or acq/rel required */
+	struct pd_dev_parm      pdi_parm;
+	struct smap_dev_alloc   pdi_ds;
+	struct rmbkt           *pdi_rmbktv;
+	struct mpool_uuid       pdi_devid;
+};
+
+/* Shortcuts */
+#define pdi_didstr    pdi_parm.dpr_prop.pdp_didstr
+#define pdi_zonepg    pdi_parm.dpr_prop.pdp_zparam.dvb_zonepg
+#define pdi_zonetot   pdi_parm.dpr_prop.pdp_zparam.dvb_zonetot
+#define pdi_devtype   pdi_parm.dpr_prop.pdp_devtype
+#define pdi_cmdopt    pdi_parm.dpr_prop.pdp_cmdopt
+#define pdi_mclass    pdi_parm.dpr_prop.pdp_mclassp
+#define pdi_devsz     pdi_parm.dpr_prop.pdp_devsz
+#define pdi_sectorsz  pdi_parm.dpr_prop.pdp_sectorsz
+#define pdi_optiosz   pdi_parm.dpr_prop.pdp_optiosz
+#define pdi_fua       pdi_parm.dpr_prop.pdp_fua
+#define pdi_prop      pdi_parm.dpr_prop
+#define pdi_name      pdi_parm.dpr_name
+
+/**
+ * struct uuid_to_mpdesc_rb -
+ * @utm_node:
+ * @utm_uuid_le:
+ * @utm_md:
+ */
+struct uuid_to_mpdesc_rb {
+	struct rb_node              utm_node;
+	struct mpool_uuid           utm_uuid_le;
+	struct mpool_descriptor    *utm_md;
+};
+
+/**
+ * struct mpdesc_mdparm - parameters used for the MDCs of the mpool.
+ * @md_mclass:  media class used for the mpool metadata
+ */
+struct mpdesc_mdparm {
+	u8     md_mclass;
+};
+
+/**
+ * struct pre_compact_ctrl - used to start/stop/control precompaction
+ * @pco_dwork:
+ * @pco_mp:
+ * @pco_nmtoc: next MDC to compact
+
+ * Each time pmd_precompact_cb() runs it will consider the next MDC
+ * for compaction.
+ */
+struct pre_compact_ctrl {
+	struct delayed_work	 pco_dwork;
+	struct mpool_descriptor *pco_mp;
+	atomic_t		 pco_nmtoc;
+};
+
+/**
+ * struct mpool_descriptor - Media pool descriptor
+ * @pds_pdvlock:  drive membership/state lock
+ * @pds_pdv:      per drive info array
+ * @pds_omlock:   open mlog index lock
+ * @pds_oml:      rbtree of open mlog layouts. indexed by objid
+ *                node type: objid_to_layout_rb
+ * @pds_poolid:   UUID of pool
+ * @pds_mdparm:   mclass id of mclass used for mdc layouts
+ * @pds_cfg:      mpool config
+ * @pds_pdvcnt:   cnt of valid pdv entries
+ * @pds_mc        table of media classes
+ * @pds_uctxt     used by user-space mlogs to indicate the context
+ * @pds_node:     for linking this object into an rbtree
+ * @pds_params:   Per mpool parameters
+ * @pds_workq:    Workqueue per mpool.
+ * @pds_sbmdc0:   Used to store in RAM the MDC0 metadata. Loaded at activate
+ *                time, changed when MDC0 is compacted.
+ * @pds_mda:      metadata container array (this thing is huge!)
+ *
+ * LOCKING:
+ *    poolid, ospagesz, mdparm: constant; no locking required
+ *    mda: protected by internal locks as documented in pmd module
+ *    oml: protected by omlock
+ *    pdv: see note
+ *    pds_mc: protected by pds_pdvlock
+ *	Update of pds_mc[].mc_sparams.mc_spzone must also be enclosed
+ *	with mpool_s_lock to serialize the spzone updates, because they include
+ *	an append of an MDC0 record on top of updating mc_spzone.
+ *    all other fields: protected by pds_pdvlock (as is pds_pdv[x].state)
+ *    pds_sbmdc0: Used to store in RAM the MDC0 metadata. Loaded when mpool
+ *	activated, no lock needed at that time (single) threaded.
+ *	Then changed during MDC0 compaction. At that time it is protected by
+ *	MDC0 compact lock.
+ *
+ * NOTE:
+ *    pds_pdvcnt only ever increases so that pds_pdv[x], x < pdvcnt, can be
+ *    accessed without locking, other than as required by the struct
+ *    mpool_dev_info.
+ *    mc_spzone is written and read only by mpool functions that are serialized
+ *    via mpool_s_lock.
+ */
+struct mpool_descriptor {
+	struct rw_semaphore         pds_pdvlock;
+
+	____cacheline_aligned
+	struct mpool_dev_info       pds_pdv[MPOOL_DRIVES_MAX];
+
+	____cacheline_aligned
+	struct mutex                pds_oml_lock;
+	struct rb_root              pds_oml_root;
+
+	/* Read-mostly fields... */
+	____cacheline_aligned
+	u16                         pds_pdvcnt;
+	struct mpdesc_mdparm        pds_mdparm;
+	struct workqueue_struct    *pds_workq;
+	struct workqueue_struct    *pds_erase_wq;
+	struct workqueue_struct    *pds_precompact_wq;
+
+	struct media_class          pds_mc[MP_MED_NUMBER];
+	struct mpcore_params        pds_params;
+	struct omf_sb_descriptor    pds_sbmdc0;
+	struct pre_compact_ctrl     pds_pco;
+	struct smap_usage_work      pds_smap_usage_work;
+
+	/* Rarey used fields... */
+	struct mpool_config         pds_cfg;
+	struct rb_node              pds_node;
+	struct mpool_uuid           pds_poolid;
+	char                        pds_name[MPOOL_NAMESZ_MAX];
+
+	/* pds_mda is enormous (91K) */
+	struct pmd_mda_info         pds_mda;
+};
+
+/**
+ * mpool_desc_unavail_add() - Add unavailable drive to mpool descriptor.
+ * @mp:
+ * @omf_devparm:
+ *
+ * Add unavailable drive to mpool descriptor; caller must guarantee that
+ * devparm.devid is not already there.
+ * As part of adding the drive to the mpool descriptor, the drive is added
+ * in its media class.
+ *
+ * Return: 0 if successful, -errno (-EINVAL or -ENOMEM) otherwise
+ */
+int mpool_desc_unavail_add(struct mpool_descriptor *mp, struct omf_devparm_descriptor *devparm);
+
+/**
+ * mpool_desc_pdmc_add() - Add a device in its media class.
+ * @mp:
+ * @pdh:
+ * @omf_devparm:
+ * @check_only: if true, the call doesn't change any state, it only check
+ *	if the PD could be added in a media class.
+ *
+ * If the media class doesn't exist yet, it is created here.
+ *
+ * This function has two inputs related to the PD it is acting on:
+ *  "phd"
+ *  and "omf_devparm"
+ *
+ * If omf_devparm is NULL, it means that the media class in which the PD must
+ * be placed is derived from mp->pds_pdv[pdh].pdi_parm.dpr_prop
+ * In that case the PD properties (.dpr_prop) must be updated and
+ * correct when entering this function.
+ * devparm is NULL when the device is available, that means the discovery
+ * was able to update .dpr_prop.
+ *
+ * If omf_devparm is not NULL, it means that the media class in which the PD
+ * must be placed is derived from omf_devparm.
+ * This is used when unavailable PDs are placed in their media class. In this
+ * situation (because the PD is unavailable) the discovery couldn't discover
+ * the PD properties and mp->pds_pdv[pdh].pdi_parm.dpr_prop has not been
+ * updated because of that.
+ * So we can't use .dpr_prop to place the PD in its class, instead we use what
+ * is coming from the persitent metadata (PD state record in MDC0). Aka
+ * omf_devparm.
+ * mp->pds_pdv[pdh].pdi_parm.dpr_prop will be update if/when the PD is available
+ * again.
+ *
+ * Restrictions in placing PDs in media classes
+ * --------------------------------------------
+ * This function enforces these restrictions.
+ * These restrictions are:
+ * a) in a mpool, for a given mclassp (enum mp_media_classp), there is
+ *    at maximum one media class.
+ * b) All drives of a media class must checksummed or none, no mix allowed.
+ * c) The STAGING and CAPACITY classes must be both checksummed or both not
+ *    checksummed.
+ *
+ * Locking:
+ * -------
+ *	Should be called with mp.pds_pdvlock held in write.
+ *	Except if mpool is single threaded (during activate for example).
+ */
+int
+mpool_desc_pdmc_add(
+	struct mpool_descriptor		*mp,
+	u16				 pdh,
+	struct omf_devparm_descriptor	*omf_devparm,
+	bool				 check_only);
+
+int uuid_to_mpdesc_insert(struct rb_root *root, struct mpool_descriptor *data);
+
+int
+mpool_dev_sbwrite(
+	struct mpool_descriptor    *mp,
+	struct mpool_dev_info      *pd,
+	struct omf_sb_descriptor   *sbmdc0);
+
+int
+mpool_mdc0_sb2obj(
+	struct mpool_descriptor    *mp,
+	struct omf_sb_descriptor   *sb,
+	struct pmd_layout         **l1,
+	struct pmd_layout         **l2);
+
+int mpool_desc_init_newpool(struct mpool_descriptor *mp, u32 flags);
+
+int
+mpool_dev_init_all(
+	struct mpool_dev_info  *pdv,
+	u64                     dcnt,
+	char                  **dpaths,
+	struct pd_prop	       *pd_prop);
+
+void mpool_mdc_cap_init(struct mpool_descriptor *mp, struct mpool_dev_info *pd);
+
+int
+mpool_desc_init_sb(
+	struct mpool_descriptor    *mp,
+	struct omf_sb_descriptor   *sbmdc0,
+	u32                         flags,
+	bool                       *mc_resize);
+
+int mpool_dev_sbwrite_newpool(struct mpool_descriptor *mp, struct omf_sb_descriptor *sbmdc0);
+
+int check_for_dups(char **listv, int cnt, int *dup, int *offset);
+
+void fill_in_devprops(struct mpool_descriptor *mp, u64 pdh, struct mpool_devprops *dprop);
+
+int mpool_create_rmlogs(struct mpool_descriptor *mp, u64 mlog_cap);
+
+struct mpool_descriptor *mpool_desc_alloc(void);
+
+void mpool_desc_free(struct mpool_descriptor *mp);
+
+int mpool_dev_check_new(struct mpool_descriptor *mp, struct mpool_dev_info *pd);
+
+static inline enum pd_status mpool_pd_status_get(struct mpool_dev_info *pd)
+{
+	enum pd_status  val;
+
+	/* Acquire semantics used so that no reads will be re-ordered from
+	 * before to after this read.
+	 */
+	val = atomic_read_acquire(&pd->pdi_status);
+
+	return val;
+}
+
+static inline void mpool_pd_status_set(struct mpool_dev_info *pd, enum pd_status status)
+{
+	/* All prior writes must be visible prior to the status change */
+	smp_wmb();
+	atomic_set(&pd->pdi_status, status);
+}
+
+/**
+ * mpool_get_mpname() - Get the mpool name
+ * @mp:     mpool descriptor of the mpool
+ * @mpname: buffer to copy the mpool name into
+ * @mplen:  buffer length
+ *
+ * Return:
+ * %0 if successful, -EINVAL otherwise
+ */
+static inline int mpool_get_mpname(struct mpool_descriptor *mp, char *mpname, size_t mplen)
+{
+	if (!mp || !mpname)
+		return -EINVAL;
+
+	strlcpy(mpname, mp->pds_name, mplen);
+
+	return 0;
+}
+
+
+#endif /* MPOOL_MPCORE_H */
diff --git a/drivers/mpool/params.h b/drivers/mpool/params.h
new file mode 100644
index 000000000000..5d1f40857a2a
--- /dev/null
+++ b/drivers/mpool/params.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_PARAMS_H
+#define MPOOL_PARAMS_H
+
+#define MPOOL_MDC_SET_SZ                16
+
+/* Mpool metadata container compaction retries; keep relatively small */
+#define MPOOL_MDC_COMPACT_RETRY_DEFAULT 5
+
+/*
+ * Space map allocation zones per drive; bounds number of concurrent obj
+ * allocs
+ */
+#define MPOOL_SMAP_RGNCNT_DEFAULT       4
+
+/*
+ * Space map alignment in number of zones.
+ */
+#define MPOOL_SMAP_ZONEALIGN_DEFAULT    1
+
+/*
+ * Number of concurent jobs for loading user MDC 1~N
+ */
+#define MPOOL_OBJ_LOAD_JOBS_DEFAULT     8
+
+/*
+ * Defaults for MDC1/255 pre-compaction.
+ */
+#define MPOOL_PCO_PCTFULL               70
+#define MPOOL_PCO_PCTGARBAGE            20
+#define MPOOL_PCO_NBNOALLOC              2
+#define MPOOL_PCO_PERIOD                 5
+#define MPOOL_PCO_FILLBIAS	      1000
+#define MPOOL_PD_USAGE_PERIOD        60000
+#define MPOOL_CREATE_MDC_PCTFULL  (MPOOL_PCO_PCTFULL - MPOOL_PCO_PCTGARBAGE)
+#define MPOOL_CREATE_MDC_PCTGRBG   MPOOL_PCO_PCTGARBAGE
+
+
+/**
+ * struct mpcore_params - mpool core parameters. Not exported to public API.
+ * @mp_mdc0cap: MDC0 capacity,  *ONLY* for testing purpose
+ * @mp_mdcncap: MDCN capacity,  *ONLY* for testing purpose
+ * @mp_mdcnnum: Number of MDCs, *ONLY* for testing purpose
+ * @mp_smaprgnc:
+ * @mp_smapalign:
+ * @mp_spare:
+ * @mp_objloadjobs: number of concurrent MDC loading jobs
+ *
+ * The below parameters starting with "pco" are used for the pre-compaction
+ * of MDC1/255
+ * @mp_pcopctfull:  % (0-100) of fill of MDCi active mlog that must be reached
+ *	before a pre-compaction is attempted.
+ * @mp_pcopctgarbage:  % (0-100) of garbage in MDCi active mlog that must be
+ *	reached	before a pre-compaction is attempted.
+ * @mp_pconbnoalloc: Number of MDCs from which no object is allocated from.
+ *	If 0, that disable the background pre compaction.
+ * @mp_pcoperiod: In seconds. Period at which a background thread check if
+ *	a MDC needs compaction.
+ * @mp_pcofillbias: If the next mpool MDC has less objects than
+ *	(current MDC objects - pcofillbias), then allocate an object
+ *	from the next MDC instead of from the current one.
+ *	This bias favors object allocation from less filled MDCs (in term
+ *	of number of committed objects).
+ *	The bigger the number, the less bias.
+ * @mp_crtmdcpctfull: percent full threshold across all MDCs in combination
+ *      with crtmdcpctgrbg percent is used as a trigger to create new MDCs
+ * @mp_crtmdcpctgrbg: percent garbage threshold in combination with
+ *      @crtmdcpctfull percent is used as a trigger to create new MDCs
+ * @mp_mpusageperiod: period at which a background thread check mpool space
+ * usage, in milliseconds
+ */
+struct mpcore_params {
+	u64    mp_mdcnum;
+	u64    mp_mdc0cap;
+	u64    mp_mdcncap;
+	u64    mp_smaprgnc;
+	u64    mp_smapalign;
+	u64    mp_spare;
+	u64    mp_objloadjobs;
+	u64    mp_pcopctfull;
+	u64    mp_pcopctgarbage;
+	u64    mp_pconbnoalloc;
+	u64    mp_pcoperiod;
+	u64    mp_pcofillbias;
+	u64    mp_crtmdcpctfull;
+	u64    mp_crtmdcpctgrbg;
+	u64    mp_mpusageperiod;
+};
+
+/**
+ * mpcore_params_defaults() -
+ */
+static inline void mpcore_params_defaults(struct mpcore_params *params)
+{
+	params->mp_mdcnum          = MPOOL_MDCNUM_DEFAULT;
+	params->mp_mdc0cap         = 0;
+	params->mp_mdcncap         = 0;
+	params->mp_smaprgnc        = MPOOL_SMAP_RGNCNT_DEFAULT;
+	params->mp_smapalign       = MPOOL_SMAP_ZONEALIGN_DEFAULT;
+	params->mp_spare           = MPOOL_SPARES_DEFAULT;
+	params->mp_pcopctfull	   = MPOOL_PCO_PCTFULL;
+	params->mp_pcopctgarbage   = MPOOL_PCO_PCTGARBAGE;
+	params->mp_pconbnoalloc    = MPOOL_PCO_NBNOALLOC;
+	params->mp_pcoperiod       = MPOOL_PCO_PERIOD;
+	params->mp_pcofillbias     = MPOOL_PCO_FILLBIAS;
+	params->mp_crtmdcpctfull   = MPOOL_CREATE_MDC_PCTFULL;
+	params->mp_crtmdcpctgrbg   = MPOOL_CREATE_MDC_PCTGRBG;
+	params->mp_mpusageperiod   = MPOOL_PD_USAGE_PERIOD;
+	params->mp_objloadjobs     = MPOOL_OBJ_LOAD_JOBS_DEFAULT;
+}
+
+#endif /* MPOOL_PARAMS_H */
diff --git a/drivers/mpool/pd.h b/drivers/mpool/pd.h
new file mode 100644
index 000000000000..c8faefc7cf11
--- /dev/null
+++ b/drivers/mpool/pd.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_PD_H
+#define MPOOL_PD_H
+
+#include <linux/uio.h>
+
+#include "uuid.h"
+#include "mpool_ioctl.h"
+
+/* Returns PD length in bytes. */
+#define PD_LEN(_pd_prop) ((_pd_prop)->pdp_devsz)
+
+/* Returns PD sector size (exponent, power of 2) */
+#define PD_SECTORSZ(_pd_prop) ((_pd_prop)->pdp_sectorsz)
+
+/* Return PD sector size mask */
+#define PD_SECTORMASK(_pd_prop) ((uint64_t)(1 << PD_SECTORSZ(_pd_prop)) - 1)
+
+struct omf_devparm_descriptor;
+
+/**
+ * struct pd_dev_parm -
+ * @dpr_prop:		drive properties including zone parameters
+ * @dpr_dev_private:    private info for implementation
+ * @dpr_name:           device name
+ */
+struct pd_dev_parm {
+	struct pd_prop	         dpr_prop;
+	void		        *dpr_dev_private;
+	char                     dpr_name[PD_NAMESZ_MAX];
+};
+
+/* Shortcuts */
+#define dpr_zonepg        dpr_prop.pdp_zparam.dvb_zonepg
+#define dpr_zonetot       dpr_prop.pdp_zparam.dvb_zonetot
+#define dpr_devsz         dpr_prop.pdp_devsz
+#define dpr_didstr        dpr_prop.pdp_didstr
+#define dpr_mediachar     dpr_prop.pdp_mediachar
+#define dpr_cmdopt        dpr_prop.pdp_cmdopt
+#define dpr_optiosz       dpr_prop.pdp_optiosz
+
+/**
+ * enum pd_status - Transient drive status.
+ * @PD_STAT_UNDEF:       undefined; should never occur
+ * @PD_STAT_ONLINE:      drive is responding to I/O requests
+ * @PD_STAT_SUSPECT:     drive is failing some I/O requests
+ * @PD_STAT_OFFLINE:     drive declared non-responsive to I/O requests
+ * @PD_STAT_UNAVAIL:     drive path not provided or open failed when mpool was opened
+ *
+ * Transient drive status, these are stored as atomic_t variable
+ * values
+ */
+enum pd_status {
+	PD_STAT_UNDEF      = 0,
+	PD_STAT_ONLINE     = 1,
+	PD_STAT_SUSPECT    = 2,
+	PD_STAT_OFFLINE    = 3,
+	PD_STAT_UNAVAIL    = 4
+};
+
+_Static_assert((PD_STAT_UNAVAIL < 256), "enum pd_status must fit in uint8_t");
+
+/**
+ * enum pd_cmd_opt - drive command options
+ * @PD_CMD_DISCARD:	     the device has TRIM/UNMAP command.
+ * @PD_CMD_SECTOR_UPDATABLE: the device can be read/written with sector granularity.
+ * @PD_CMD_DIF_ENABLED:      T10 DIF is used on this device.
+ * @PD_CMD_SED_ENABLED:      Self encrypting enabled
+ * @PD_CMD_DISCARD_ZERO:     the device supports discard_zero
+ * @PD_CMD_RDONLY:           activate mpool with PDs in RDONLY mode,
+ *                           write/discard commands are No-OPs.
+ * Defined as a bit vector so can combine.
+ * Fields holding such a vector should uint64_t.
+ *
+ * TODO: we need to find a way to detect if SED is enabled on a device
+ */
+enum pd_cmd_opt {
+	PD_CMD_NONE             = 0,
+	PD_CMD_DISCARD          = 0x1,
+	PD_CMD_SECTOR_UPDATABLE = 0x2,
+	PD_CMD_DIF_ENABLED      = 0x4,
+	PD_CMD_SED_ENABLED      = 0x8,
+	PD_CMD_DISCARD_ZERO     = 0x10,
+	PD_CMD_RDONLY           = 0x20,
+};
+
+/**
+ * enum pd_devtype - Device types
+ * @PD_DEV_TYPE_BLOCK_STREAM: Block device implementing streams.
+ * @PD_DEV_TYPE_BLOCK_STD:    Standard (non-streams) device (SSD, HDD).
+ * @PD_DEV_TYPE_FILE:	      File in user space for UT.
+ * @PD_DEV_TYPE_MEM:	      Memory semantic device, e.g. NVDIMM direct access (raw or dax mode)
+ * @PD_DEV_TYPE_ZONE:	      zone-like device, e.g., open channel SSD and SMR HDD (using ZBC/ZAC)
+ * @PD_DEV_TYPE_BLOCK_NVDIMM: Standard (non-streams) NVDIMM in sector mode.
+ */
+enum pd_devtype {
+	PD_DEV_TYPE_BLOCK_STREAM = 1,
+	PD_DEV_TYPE_BLOCK_STD,
+	PD_DEV_TYPE_FILE,
+	PD_DEV_TYPE_MEM,
+	PD_DEV_TYPE_ZONE,
+	PD_DEV_TYPE_BLOCK_NVDIMM,
+	PD_DEV_TYPE_LAST = PD_DEV_TYPE_BLOCK_NVDIMM,
+};
+
+_Static_assert((PD_DEV_TYPE_LAST < 256), "enum pd_devtype must fit in uint8_t");
+
+/**
+ * enum pd_state - Device states
+ * @PD_DEV_STATE_AVAIL:       Device is available
+ * @PD_DEV_STATE_UNAVAIL:     Device is unavailable
+ */
+enum pd_state {
+	PD_DEV_STATE_UNDEFINED = 0,
+	PD_DEV_STATE_AVAIL = 1,
+	PD_DEV_STATE_UNAVAIL = 2,
+	PD_DEV_STATE_LAST = PD_DEV_STATE_UNAVAIL,
+};
+
+_Static_assert((PD_DEV_STATE_LAST < 256), "enum pd_state must fit in uint8_t");
+
+/*
+ * pd API functions -- device-type independent dparm ops
+ */
+
+/*
+ * Error codes: All pd functions can return one or more of:
+ *
+ * -EINVAL    invalid fn args
+ * -EBADSLT   attempt to read or write a bad zone on a zone device
+ * -EIO       all other errors
+ */
+
+int pd_dev_open(const char *path, struct pd_dev_parm *dparm, struct pd_prop *pd_prop);
+int pd_dev_close(struct pd_dev_parm *dparm);
+int pd_dev_flush(struct pd_dev_parm *dparm);
+
+/**
+ * pd_bio_erase() -
+ * @pd:
+ * @zaddr:
+ * @zonecnt:
+ * @reads_erased: whether the data can be read post DISCARD
+ *
+ * Return:
+ */
+int pd_zone_erase(struct pd_dev_parm *dparm, u64 zaddr, u32 zonecnt, bool reads_erased);
+
+/*
+ * pd API functions - device dependent operations
+ */
+
+/**
+ * pd_zone_pwritev() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr:
+ * @boff: offset in bytes from the start of "zaddr".
+ * @opflags:
+ *
+ * Return:
+ */
+int pd_zone_pwritev(struct pd_dev_parm *dparm, const struct kvec *iov,
+		    int iovcnt, u64 zaddr, loff_t boff, int opflags);
+
+/**
+ * pd_zone_pwritev_sync() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr:
+ * @boff: Offset in bytes from the start of zaddr.
+ *
+ * Return:
+ */
+int pd_zone_pwritev_sync(struct pd_dev_parm *dparm, const struct kvec *iov,
+			 int iovcnt, u64 zaddr, loff_t boff);
+
+/**
+ * pd_zone_preadv() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr: target zone for this I/O
+ * @boff:    byte offset into the target zone
+ *
+ * Return:
+ */
+int pd_zone_preadv(struct pd_dev_parm *dparm, const struct kvec *iov,
+		   int iovcnt, u64 zaddr, loff_t boff);
+
+void pd_dev_set_unavail(struct pd_dev_parm *dparm, struct omf_devparm_descriptor *omf_devparm);
+
+int pd_init(void) __cold;
+void pd_exit(void) __cold;
+
+#endif /* MPOOL_PD_H */
diff --git a/drivers/mpool/pmd.h b/drivers/mpool/pmd.h
new file mode 100644
index 000000000000..5fd6ca020fd1
--- /dev/null
+++ b/drivers/mpool/pmd.h
@@ -0,0 +1,379 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_PMD_H
+#define MPOOL_PMD_H
+
+#include <linux/atomic.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#include "mpool_ioctl.h"
+#include "omf_if.h"
+#include "pmd_obj.h"
+
+/**
+ * DOC: Module info.
+ *
+ * Pool metadata (pmd) module.
+ *
+ * Implements functions for mpool metadata management.
+ *
+ */
+
+struct mpool_descriptor;
+struct mpool_dev_info;
+struct mp_mdc;
+struct pmd_layout;
+struct mpool_config;
+
+/**
+ * DOC: Object lifecycle
+ *
+ * +) all mblock/mlog objects are owned by mpool layer users, excepting
+ *     mdc mlogs
+ * +) users are responsible for object lifecycle mgmt and must not violate it;
+ *    e.g. by using an object handle (layout pointer) after deleting that
+ *    object
+ * +) the mpool layer never independently aborts or deletes user objects
+ */
+
+/**
+ * DOC: Object ids
+ * Object ids for mblocks and mlogs are a unit64 of the form:
+ * <uniquifier (52-bits), type (4-bits), slot # (8 bits)>
+ *
+ */
+
+/**
+ * DOC: NOTES
+ * + metadata for a given object is stored in the mdc specified by slot #
+ * + uniquifiers are only guaranteed unique for a given slot #
+ * + metadata for all mdc (except mdc 0) are stored in mdc 0
+ * + mdc 0 is a distinguished container whose metadata is stored in superblocks
+ * + mdc 0 only stores object metadata for mdc 1-255
+ * + mdc N is implemented via mlogs with objids (2N, MLOG, 0) & (2N+1, MLOG, 0)
+ * + mdc 0 mlog objids are (0, MLOG, 0) and (1, MLOG, 0) where a slot # of 0
+ *   indicates the mlog metadata is stored in mdc 0 whereas it is actually in
+ *   superblocks; see comments in pmd_mdc0_init() for how we exploit this.
+ */
+
+/**
+ * struct pre_compact_ctrs - objects records counters, used for pre compaction of MDC1/255.
+ * @pcc_cr:   count of object create records
+ * @pcc_up:   count of object update records
+ * @pcc_del:  count of object delete records. If the object is shceduled for
+ *	deletion in the background, the counter is incremented (while the
+ *	delete record has not been written yet).
+ * @pcc_er:   count of object erase records
+ * @pcc_cobj: count of committed objects (and not deleted).
+ * @pcc_cap: In bytes, size of each mlog of the MDC
+ * @pcc_len: In bytes, how much is filled the active mlog.
+ *
+ * One such structure per mpool MDC.
+ *
+ * Locking:
+ *	Updates are serialized by the MDC compact lock.
+ *	The reads by the pre-compaction thread are done without holding any
+ *	lock. This is why atomic variables are used.
+ *	However because the variables are integers, the atomic read translates
+ *	into a simple load and the set translate in a simple store.
+ *
+ * The counters pcc_up, pcc_del, pcc_er are cleared at each compaction.
+ *
+ * Relaxed access is appropriate for all of these atomics
+ */
+struct pre_compact_ctrs {
+	atomic_t   pcc_cr;
+	atomic_t   pcc_up;
+	atomic_t   pcc_del;
+	atomic_t   pcc_er;
+	atomic_t   pcc_cobj;
+	atomic64_t pcc_cap;
+	atomic64_t pcc_len;
+};
+
+/**
+ * struct credit_info - mdc selector info
+ * @ci_credit:      available credit
+ * @ci_free:        available free space
+ * @ci_slot:        MDC slot number
+ *
+ * Contains information about available credit and a balance. Available
+ * credit is based on an rate at which records can be written to
+ * mdc such that all MDC will fill at the same time.
+ */
+struct credit_info  {
+	u64                 ci_credit;
+	u64                 ci_free;
+	u8                  ci_slot;
+};
+
+/**
+ * struct pmd_mdc_stats - per MDC space usage stats
+ * @pms_mblock_alen: mblock alloc len
+ * @pms_mblock_wlen: mblock write len
+ * @pms_mlog_alen: mlog alloc len
+ * @pms_mblock_cnt: mblock count
+ * @pms_mlog_cnt: mlog count
+ */
+struct pmd_mdc_stats {
+	u64    pms_mblock_alen;
+	u64    pms_mblock_wlen;
+	u64    pms_mlog_alen;
+	u32    pms_mblock_cnt;
+	u32    pms_mlog_cnt;
+};
+
+/**
+ * struct pmd_mdc_info - Metadata container (mdc) info.
+ * @mmi_compactlock: compaction lock
+ * @mmi_uc_lock:     uncommitted objects tree lock
+ * @mmi_uc_root:     uncommitted objects tree root
+ * @mmi_co_lock:     committed objects tree lock
+ * @mmi_co_root:     committed objects tree root
+ * @mmi_uqlock:      uniquifier lock
+ * @mmi_luniq:       uniquifier of last object assigned to container
+ * @mmi_mdc:         MDC implementing container
+ * @mmi_recbuf:      buffer for (un)packing log records
+ * @mmi_lckpt:       last objid checkpointed
+ * @mmi_stats:       per-MDC usage stats
+ * @mmi_stats_lock:  lock for protecting mmi_stats
+ * @mmi_pco_cnt:     counters used by the pre compaction of MDC1/255.
+ * @mmi_mdcver:      version of the mdc content on media when the mpool was
+ *                   activated. That may not be the current version on media
+ *                   if a MDC metadata conversion took place during activate.
+ * @mmi_credit       MDC credit info
+ *
+ * LOCKING:
+ * + mmi_luniq: protected by uqlock
+ * + mmi_mdc, recbuf, lckpt: protected by compactlock
+ * + mmi_co_root: protected by co_lock
+ * + mmi_uc_root: protected by uc_lock
+ * + mmi_stats: protected by mmi_stats_lock
+ * + mmi_pco_counters: updates serialized by mmi_compactlock
+ *
+ * NOTE:
+ *  + for mdc0 mmi_luniq is the slot # of the last mdc created
+ *  + logging to a mdc cannot execute concurrent with compacting
+ *    that mdc;
+ *    mmi_compactlock is used to enforce this
+ *  + compacting a mdc requires freezing both the list of committed
+ *    objects in that mdc and the metadata for those objects;
+ *    compactlock facilitates this in a way that avoids locking each
+ *    object during compaction; as a result object metadata updates
+ *    are serialized, but even without mdc compaction this would be
+ *    the case because all such metadata updates must be logged to
+ *    the object's mdc and mdc logging is inherently serial
+ *  + see struct pmd_layout comments for specifics on how
+ *    compactlock is used to freeze metadata for committed objects
+ */
+struct pmd_mdc_info {
+	struct mutex            mmi_compactlock;
+	char                   *mmi_recbuf;
+	u64                     mmi_lckpt;
+	struct mp_mdc          *mmi_mdc;
+
+	____cacheline_aligned
+	struct mutex            mmi_uc_lock;
+	struct rb_root          mmi_uc_root;
+
+	____cacheline_aligned
+	struct rw_semaphore     mmi_co_lock;
+	struct rb_root          mmi_co_root;
+
+	____cacheline_aligned
+	struct mutex            mmi_uqlock;
+	u64                     mmi_luniq;
+
+	____cacheline_aligned
+	struct credit_info      mmi_credit;
+	struct omf_mdcver       mmi_mdcver;
+
+	____cacheline_aligned
+	struct mutex            mmi_stats_lock;
+	struct pmd_mdc_stats    mmi_stats;
+
+	struct pre_compact_ctrs mmi_pco_cnt;
+};
+
+/**
+ * struct pmd_mdc_selector - Object containing MDC slots for allocation
+ * @mds_tbl_idx:      idx of the MDC slot selector in the mds_tbl
+ * @mds_tbl:          slot table used for MDC selection
+ * @mds_mdc:          scratch pad for sorting mdc by free size
+ *
+ * LOCKING:
+ *  + mdi_slotvlock lock will be taken to protect this object.
+ *
+ */
+struct pmd_mdc_selector {
+	atomic_t    mds_tbl_idx;
+	u8          mds_tbl[MDC_TBL_SZ];
+	void       *mds_smdc[MDC_SLOTS];
+};
+
+/**
+ * struct pmd_mda_info - Metadata container array (mda).
+ * @mdi_slotvlock:   it is assumed that this spinlock is NOT taken from interrupt context
+ * @mdi_slotvcnt:    number of active slotv entries
+ * @mdi_slotv:       per mdc info
+ * @mdi_sel:         MDC allocation selector
+ *
+ * LOCKING:
+ *  + mdi_slotvcnt: protected by mdi_slotvlock
+ *
+ * NOTE:
+ *  + mdi_slotvcnt only ever increases so mdi_slotv[x], x < mdi_slotvcnt, is
+ *    always active
+ *  + all mdi_slotv[] entries are initialized whether or not active so they
+ *    can all be accessed w/o locking except as required by pmd_mdc_info struct
+ */
+struct pmd_mda_info {
+	spinlock_t              mdi_slotvlock;
+	u16                     mdi_slotvcnt;
+
+	struct pmd_mdc_info     mdi_slotv[MDC_SLOTS];
+	struct pmd_mdc_selector mdi_sel;
+};
+
+/**
+ * struct pmd_obj_load_work - work struct for loading MDC 1~N
+ * @olw_work:     work struct
+ * @olw_mp:
+ * @olw_progress: Progress index. It is an (atomic_t *) so that multiple
+ *                pmd_obj_load_work structs can point to a single atomic_t
+ *                for grabbing the next MDC number to be processed.
+ * @olw_err:
+ */
+struct pmd_obj_load_work {
+	struct work_struct          olw_work;
+	struct mpool_descriptor    *olw_mp;
+	atomic_t                   *olw_progress; /* relaxed is correct */
+	atomic_t                   *olw_err;
+};
+
+/**
+ * pmd_mpool_activate() - Load all metadata for mpool mp.
+ * @mp:
+ * @mdc01:
+ * @mdc02:
+ * @create:
+ *
+ * Load all metadata for mpool mp; create flag indicates if is a new pool;
+ * caller must ensure no other thread accesses mp until activation is complete.
+ * note: pmd module owns mdc01/2 memory mgmt whether succeeds or fails
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_mpool_activate(struct mpool_descriptor *mp, struct pmd_layout *mdc01,
+		       struct pmd_layout *mdc02, int create);
+
+/**
+ * pmd_mpool_deactivate() - Deactivate mpool mp.
+ * @mp:
+ *
+ * Free all metadata for mpool mp excepting mp itself; caller must ensure
+ * no other thread can access mp during deactivation.
+ */
+void pmd_mpool_deactivate(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mdc_alloc() - Add a metadata container to mpool.
+ * @mp:
+ * @mincap:
+ * @iter: the role of this parameter is to get the active mlogs of the mpool
+ *	MDCs uniformely spread on the mpool devices.
+ *	When pmd_mdc_alloc() is called in a loop to allocate several mpool MDCs,
+ *	iter should be incremented at each subsequent call.
+ *
+ * Add a metadata container (mdc) to mpool with a minimum capacity of mincap
+ * bytes.  Once added an mdc can never be deleted.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_mdc_alloc(struct mpool_descriptor *mp, u64 mincap, u32 iter);
+
+/**
+ * pmd_mdc_cap() - Get metadata container (mdc) capacity stats.
+ * @mp:
+ * @mdcmax:
+ * @mdccap:
+ * @mdc0cap:
+ *
+ * Get metadata container (mdc) stats: count, aggregate capacity ex-mdc0 and
+ * mdc0 cap
+ */
+void pmd_mdc_cap(struct mpool_descriptor *mp, u64 *mdcmax, u64 *mdccap, u64 *mdc0cap);
+
+/**
+ * pmd_prop_mcconfig() -
+ * @mp:
+ * @pd:
+ * @compacting: if true, called by a compaction.
+ *
+ * Persist state (new or update) for drive pd; caller must hold mp.pdvlock
+ * if pd is an in-use member of mp.pdv.
+ *
+ * Locking: caller must hold MDC0 compact lock.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_prop_mcconfig(struct mpool_descriptor *mp, struct mpool_dev_info *pd, bool compacting);
+
+/**
+ * pmd_prop_mcspare() -
+ * @mp:
+ * @mclassp:
+ * @spzone:
+ * @compacting: if true, called by a compaction.
+ *
+ * Persist spare zone info for drives in media class (new or update).
+ *
+ * Locking: caller must hold MDC0 compact lock.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_prop_mcspare(struct mpool_descriptor *mp, enum mp_media_classp mclassp,
+		     u8 spzone, bool compacting);
+
+int pmd_prop_mpconfig(struct mpool_descriptor *mp, const struct mpool_config *cfg, bool compacting);
+
+/**
+ * pmd_precompact_start() - start MDC1/255 precompaction
+ * @mp:
+ */
+void pmd_precompact_start(struct mpool_descriptor *mp);
+
+/**
+ * pmd_precompact_stop() - stop MDC1/255 precompaction
+ * @mp:
+ */
+void pmd_precompact_stop(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mdc_addrec_version() -add a version record in a mpool MDC.
+ * @mp:
+ * @cslot:
+ */
+int pmd_mdc_addrec_version(struct mpool_descriptor *mp, u8 cslot);
+
+int pmd_log_delete(struct mpool_descriptor *mp, u64 objid);
+
+int pmd_log_create(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_log_erase(struct mpool_descriptor *mp, u64 objid, u64 gen);
+
+int pmd_log_idckpt(struct mpool_descriptor *mp, u64 objid);
+
+#define PMD_MDC0_COMPACTLOCK(_mp) \
+	pmd_mdc_lock(&((_mp)->pds_mda.mdi_slotv[0].mmi_compactlock), 0)
+
+#define PMD_MDC0_COMPACTUNLOCK(_mp) \
+	pmd_mdc_unlock(&((_mp)->pds_mda.mdi_slotv[0].mmi_compactlock))
+
+#endif /* MPOOL_PMD_H */
diff --git a/drivers/mpool/pmd_obj.h b/drivers/mpool/pmd_obj.h
new file mode 100644
index 000000000000..7cf5dea80f9d
--- /dev/null
+++ b/drivers/mpool/pmd_obj.h
@@ -0,0 +1,499 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_PMD_OBJ_H
+#define MPOOL_PMD_OBJ_H
+
+#include <linux/sort.h>
+#include <linux/rbtree.h>
+#include <linux/kref.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+
+#include "uuid.h"
+#include "mpool_ioctl.h"
+#include "omf_if.h"
+#include "mlog.h"
+
+struct mpool_descriptor;
+struct pmd_mdc_info;
+
+/*
+ * objid uniquifier checkpoint interval; used to avoid reissuing an outstanding
+ * objid after a crash; supports pmd_{mblock|mlog}_realloc()
+ */
+#define OBJID_UNIQ_POW2 8
+#define OBJID_UNIQ_DELTA (1 << OBJID_UNIQ_POW2)
+
+/* MDC_SLOTS is 256 [0,255] to fit in 8-bit slot field in objid.
+ */
+#define MDC_SLOTS           256
+#define MDC_TBL_SZ          (MDC_SLOTS * 4)
+
+#define UROOT_OBJID_LOG1 logid_make(0, 1)
+#define UROOT_OBJID_LOG2 logid_make(1, 1)
+#define UROOT_OBJID_MAX  1
+
+#define MDC0_OBJID_LOG1 logid_make(0, 0)
+#define MDC0_OBJID_LOG2 logid_make(1, 0)
+
+/**
+ * enum pmd_lock_class -
+ * @PMD_NONE:
+ * @PMD_OBJ_CLIENT:
+ *      For layout rwlock,
+ *              - Object id contains a non-zero slot number
+ * @PMD_MDC_NORMAL:
+ *      For layout rwlock,
+ *              - Object id contains a zero slot number AND
+ *              - Object id is neither of the well-known MDC-0 objids
+ *      For pmd_mdc_info.* locks,
+ *              - Array index of pmd_mda_info.slov[] is > 0.
+ * @PMD_MDC_ZERO:
+ *      For layout rwlock,
+ *              - Object id contains a zero slot number AND
+ *              - Object id is either of the well-known MDC-0 objids
+ *      For pmd_mdc_info.* locks,
+ *              - Array index of pmd_mda_info.slov[] is == 0.
+ *
+ * NOTE:
+ * - Object layout rw locks must be acquired before any MDC locks.
+ * - MDC-0 locks of a given class are below MDC-1/255 locks of those same
+ *   classes.
+ */
+enum pmd_lock_class {
+	PMD_NONE       = 0,
+	PMD_OBJ_CLIENT = 1,
+	PMD_MDC_NORMAL = 2,
+	PMD_MDC_ZERO   = 3,
+};
+
+/**
+ * enum pmd_obj_op -
+ * @PMD_OBJ_LOAD:
+ * @PMD_OBJ_ALLOC:
+ * @PMD_OBJ_COMMIT:
+ * @PMD_OBJ_ABORT:
+ * @PMD_OBJ_DELETE:
+ */
+enum pmd_obj_op {
+	PMD_OBJ_LOAD     = 1,
+	PMD_OBJ_ALLOC    = 2,
+	PMD_OBJ_COMMIT   = 3,
+	PMD_OBJ_ABORT    = 4,
+	PMD_OBJ_DELETE   = 5,
+};
+
+/**
+ * enum pmd_layout_state - object state flags
+ * @PMD_LYT_COMMITTED: object is committed to media
+ * @PMD_LYT_REMOVED:   object logically removed (aborted or deleted)
+ */
+enum pmd_layout_state {
+	PMD_LYT_COMMITTED  = 0x01,
+	PMD_LYT_REMOVED    = 0x02,
+};
+
+/**
+ * struct pmd_layout_mlpriv - mlog private data for pmd_layout
+ * @mlp_uuid:       unique ID per mlog
+ * @mlp_lstat:      mlog status
+ * @mlp_nodeoml:    "open mlog" rbtree linkage
+ */
+struct pmd_layout_mlpriv {
+	struct mpool_uuid   mlp_uuid;
+	struct rb_node      mlp_nodeoml;
+	struct mlog_stat    mlp_lstat;
+};
+
+/**
+ * union pmd_layout_priv - pmd_layout object type specific private data
+ * @mlpriv: mlog private data
+ */
+union pmd_layout_priv {
+	struct pmd_layout_mlpriv    mlpriv;
+};
+
+/**
+ * struct pmd_layout - object layout (in-memory version)
+ * @eld_nodemdc: rbtree node for uncommitted and committed objects
+ * @eld_objid:   object ID associated with layout
+ * @eld_mblen:   Amount of data written in the mblock in bytes (0 for mlogs)
+ * @eld_state:   enum pmd_layout_state
+ * @eld_flags:   enum mlog_open_flags for mlogs
+ * @eld_gen:     object generation
+ * @eld_ld:
+ * @eld_ref:     user ref count from alloc/get/put
+ * @eld_rwlock:  implements pmd_obj_*lock() for this layout
+ * @dle_mlpriv:  mlog private data
+ *
+ * LOCKING:
+ * + objid: constant; no locking required
+ * + lstat: lstat and *lstat are protected by pmd_obj_*lock()
+ * + all other fields: see notes
+ *
+ * NOTE:
+ * + committed object fields (other): to update hold pmd_obj_wrlock()
+ *   AND
+ *   compactlock for object's mdc; to read hold pmd_obj_*lock()
+ *   See the comments associated with struct pmd_mdc_info for
+ *   further details.
+ *
+ * eld_priv[] contains exactly one element if the object type
+ * is and mlog, otherwise it contains exactly zero element.
+ */
+struct pmd_layout {
+	struct rb_node                  eld_nodemdc;
+	u64                             eld_objid;
+	u32                             eld_mblen;
+	u8                              eld_state;
+	u8                              eld_flags;
+	u64                             eld_gen;
+	struct omf_layout_descriptor    eld_ld;
+
+	/* The above fields are read-mostly, while the
+	 * following two fields mutate frequently.
+	 */
+	struct kref                     eld_ref;
+	struct rw_semaphore             eld_rwlock;
+
+	union pmd_layout_priv           eld_priv[];
+};
+
+/* Shortcuts for mlog private data...
+ */
+#define eld_mlpriv      eld_priv->mlpriv
+#define eld_uuid        eld_mlpriv.mlp_uuid
+#define eld_lstat       eld_mlpriv.mlp_lstat
+#define eld_nodeoml     eld_mlpriv.mlp_nodeoml
+
+/**
+ * struct pmd_obj_capacity -
+ * @moc_captgt:  capacity target for object in bytes
+ * @moc_spare:   true, if alloc obj from spare space
+ */
+struct pmd_obj_capacity {
+	u64    moc_captgt;
+	bool   moc_spare;
+};
+
+/**
+ * struct pmd_obj_erase_work - workqueue job struct for object erase and free
+ * @oef_mp:             mpool
+ * @oef_layout:         object layout
+ * @oef_cache:          kmem cache to free work (or NULL)
+ * @oef_wqstruct:	workq struct
+ */
+struct pmd_obj_erase_work {
+	struct mpool_descriptor    *oef_mp;
+	struct pmd_layout          *oef_layout;
+	struct kmem_cache          *oef_cache;
+	struct work_struct          oef_wqstruct;
+};
+
+/**
+ * struct mdc_csm_info - mdc credit set member info
+ * @m_slot:      mdc slot number
+ * @ci_credit:   available credit
+ */
+struct mdc_csm_info {
+	u8   m_slot;
+	u16  m_credit;
+};
+
+/**
+ * struct mdc_credit_set - mdc credit set
+ * @cs_idx:      index of current credit set member
+ * @cs_num_csm:  number of credit set members in this credit set
+ * @cs_csm:      array of credit set members
+ */
+struct mdc_credit_set {
+	u8                    cs_idx;
+	u8                    cs_num_csm;
+	struct mdc_csm_info   csm[MPOOL_MDC_SET_SZ];
+};
+
+/**
+ * pmd_obj_alloc() - Allocate an object.
+ * @mp:
+ * @otype:
+ * @ocap:
+ * @mclassp: media class
+ * @layoutp:
+ *
+ * Allocate object of type otype with parameters and capacity as specified
+ * by ocap on drives in media class mclassp providing a minimum capacity of
+ * mincap bytes; if successful returns object layout.
+ *
+ * Note:
+ * Object is not persistent until committed; allocation can be aborted.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_alloc(struct mpool_descriptor *mp, enum obj_type_omf otype,
+		  struct pmd_obj_capacity *ocap, enum mp_media_classp mclassp,
+		  struct pmd_layout **layoutp);
+
+
+/**
+ * pmd_obj_realloc() - Re-allocate an object.
+ * @mp:
+ * @objid:
+ * @ocap:
+ * @mclassp: media class
+ * @layoutp:
+ *
+ * Allocate object with specified objid to support crash recovery; otherwise
+ * is equivalent to pmd_obj_alloc(); if successful returns object layout.
+ *
+ * Note:
+ * Object is not persistent until committed; allocation can be aborted.
+ *
+ * Return: %0 if successful; -errno otherwise
+ */
+int pmd_obj_realloc(struct mpool_descriptor *mp, u64 objid, struct pmd_obj_capacity *ocap,
+		    enum mp_media_classp mclassp, struct pmd_layout **layoutp);
+
+
+/**
+ * pmd_obj_commit() - Commit an object.
+ * @mp:
+ * @layout:
+ *
+ * Make allocated object persistent; if fails object remains uncommitted so
+ * can retry commit or abort; object cannot be committed while in erasing or
+ * aborting state; caller MUST NOT hold pmd_obj_*lock() on layout.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_commit(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_abort() - Discard un-committed object.
+ * @mp:
+ * @layout:
+ *
+ * Discard uncommitted object; caller MUST NOT hold pmd_obj_*lock() on
+ * layout; if successful layout is invalid after call.
+ *
+ * Return: %0 if successful; -errno otherwise
+ */
+int pmd_obj_abort(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_delete() - Delete committed object.
+ * @mp:
+ * @layout:
+ *
+ * Delete committed object; caller MUST NOT hold pmd_obj_*lock() on layout;
+ * if successful layout is invalid.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_delete(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_erase() - Log erase for object and set state flag and generation number
+ * @mp:
+ * @layout:
+ * @gen:
+ *
+ * Object must be in committed state; caller MUST hold pmd_obj_wrlock() on layout.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_erase(struct mpool_descriptor *mp, struct pmd_layout *layout, u64 gen);
+
+/**
+ * pmd_obj_find_get() - Get a reference for a layout for objid.
+ * @mp:
+ * @objid:
+ * @which:
+ *
+ * Get layout for object with specified objid; return NULL either if not found
+ *
+ * Return: pointer to layout if successful, NULL otherwise
+ */
+struct pmd_layout *pmd_obj_find_get(struct mpool_descriptor *mp, u64 objid, int which);
+
+/**
+ * pmd_obj_rdlock() - Read-lock object layout with appropriate nesting level.
+ * @layout:
+ */
+void pmd_obj_rdlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_rdunlock() - Release read lock on object layout.
+ * @layout:
+ */
+void pmd_obj_rdunlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_wrlock() - Write-lock object layout with appropriate nesting level.
+ * @layout:
+ */
+void pmd_obj_wrlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_wrunlock() - Release write lock on object layout.
+ * @layout:
+ */
+void pmd_obj_wrunlock(struct pmd_layout *layout);
+
+/**
+ * pmd_init_credit() - udpates available credit and setup mdc selector table
+ * @mp: mpool object
+ *
+ * Lock: No Lock required
+ *
+ * Used to initialize credit when new MDCs are added and add the mds to
+ * available
+ * credit list.
+ */
+void pmd_update_credit(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mpool_usage() - calculate per-mpool space usage
+ * @mp:
+ * @usage:
+ */
+void pmd_mpool_usage(struct mpool_descriptor *mp, struct mpool_usage *usage);
+
+/**
+ * pmd_precompact_alsz() - Inform MDC1/255 pre-compacting about the active
+ *	mlog of an mpool MDCi 0<i<=255.
+ *	The size and how much is used are passed in.
+ *	"alsz" stands for active mlog size.
+ * @mp:
+ * @objid: objid of the active mlog of the mpool MDCi
+ * @len: In bytes, how much of the active mlog is used.
+ * @cap: In bytes, size of the active mlog.
+ */
+void pmd_precompact_alsz(struct mpool_descriptor *mp, u64 objid, u64 len, u64 cap);
+
+/**
+ * pmd_layout_alloc() - create and initialize an pmd_layout
+ * @objid:  mblock/mlog object ID
+ * @gen:    generation number
+ * @mblen:  mblock written length
+ * @zcnt:   number of zones in a strip
+ *
+ * Alloc and init object layout; non-arg fields and all strip descriptor
+ * fields are set to 0/UNDEF/NONE; no auxiliary object info is allocated.
+ *
+ * Return: NULL if allocation fails.
+ */
+struct pmd_layout *pmd_layout_alloc(struct mpool_uuid *uuid, u64 objid,
+				    u64 gen, u64 mblen, u32 zcnt);
+
+/**
+ * pmd_layout_release() - free pmd_layout and internal elements
+ * @layout:
+ *
+ * Deallocate all memory associated with object layout.
+ *
+ * Return: void
+ */
+void pmd_layout_release(struct kref *refp);
+
+int pmd_layout_rw(struct mpool_descriptor *mp, struct pmd_layout *layout,
+		  const struct kvec *iov, int iovcnt, u64 boff, int flags, u8 rw);
+
+struct mpool_dev_info *pmd_layout_pd_get(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+u64 pmd_layout_cap_get(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_layout_erase(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_obj_alloc_cmn(struct mpool_descriptor *mp, u64 objid, enum obj_type_omf otype,
+		      struct pmd_obj_capacity *ocap, enum mp_media_classp mclass,
+		      int realloc, bool needref, struct pmd_layout **layoutp);
+
+void pmd_update_obj_stats(struct mpool_descriptor *mp, struct pmd_layout *layout,
+			  struct pmd_mdc_info *cinfo, enum pmd_obj_op op);
+
+void pmd_obj_rdlock(struct pmd_layout *layout);
+void pmd_obj_rdunlock(struct pmd_layout *layout);
+
+void pmd_obj_wrlock(struct pmd_layout *layout);
+void pmd_obj_wrunlock(struct pmd_layout *layout);
+
+void pmd_co_rlock(struct pmd_mdc_info *cinfo, u8 slot);
+void pmd_co_runlock(struct pmd_mdc_info *cinfo);
+
+struct pmd_layout *pmd_co_find(struct pmd_mdc_info *cinfo, u64 objid);
+struct pmd_layout *pmd_co_insert(struct pmd_mdc_info *cinfo, struct pmd_layout *layout);
+struct pmd_layout *pmd_co_remove(struct pmd_mdc_info *cinfo, struct pmd_layout *layout);
+
+int pmd_smap_insert(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_init(void) __cold;
+void pmd_exit(void) __cold;
+
+static inline bool objtype_user(enum obj_type_omf otype)
+{
+	return (otype == OMF_OBJ_MBLOCK || otype == OMF_OBJ_MLOG);
+}
+
+static inline u64 objid_make(u64 uniq, enum obj_type_omf otype, u8 cslot)
+{
+	return ((uniq << 12) | ((otype & 0xF) << 8) | (cslot & 0xFF));
+}
+
+static inline u64 objid_uniq(u64 objid)
+{
+	return (objid >> 12);
+}
+
+static inline u8 objid_slot(u64 objid)
+{
+	return (objid & 0xFF);
+}
+
+static inline bool objid_ckpt(u64 objid)
+{
+	return !(objid_uniq(objid) & (OBJID_UNIQ_DELTA - 1));
+}
+
+static inline u64 logid_make(u64 uniq, u8 cslot)
+{
+	return objid_make(uniq, OMF_OBJ_MLOG, cslot);
+}
+
+static inline bool objid_mdc0log(u64 objid)
+{
+	return ((objid == MDC0_OBJID_LOG1) || (objid == MDC0_OBJID_LOG2));
+}
+
+static inline enum obj_type_omf pmd_objid_type(u64 objid)
+{
+	enum obj_type_omf otype = objid_type(objid);
+
+	return objtype_valid(otype) ? otype : OMF_OBJ_UNDEF;
+}
+
+/* True if objid is an mpool user object (versus mpool metadata object). */
+static inline bool pmd_objid_isuser(u64 objid)
+{
+	return objtype_user(objid_type(objid)) && objid_slot(objid);
+}
+
+static inline void pmd_obj_put(struct pmd_layout *layout)
+{
+	kref_put(&layout->eld_ref, pmd_layout_release);
+}
+
+/* General mdc locking (has external callers...) */
+static inline void pmd_mdc_lock(struct mutex *lock, u8 slot)
+{
+	mutex_lock_nested(lock, slot > 0 ? PMD_MDC_NORMAL : PMD_MDC_ZERO);
+}
+
+static inline void pmd_mdc_unlock(struct mutex *lock)
+{
+	mutex_unlock(lock);
+}
+
+#endif /* MPOOL_PMD_OBJ_H */
diff --git a/drivers/mpool/sb.h b/drivers/mpool/sb.h
new file mode 100644
index 000000000000..673a5f742f7c
--- /dev/null
+++ b/drivers/mpool/sb.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_SB_PRIV_H
+#define MPOOL_SB_PRIV_H
+
+#include "mpool_ioctl.h"
+
+struct pd_dev_parm;
+struct omf_sb_descriptor;
+struct pd_prop;
+
+/*
+ * Drives have 2 superblocks.
+ * + sb0 at byte offset 0
+ * + sb1 at byte offset SB_AREA_SZ
+ *
+ * Read: sb0 is the authoritative copy, other copies are not used.
+ * Updates: sb0 is updated first; if successful sb1 is updated
+ */
+/* Number of superblock per Physical Device.  */
+#define SB_SB_COUNT        2
+
+/*
+ * Size in byte of the area occupied by a superblock. The superblock itself
+ * may be smaller, but always starts at the beginning of its area.
+ */
+#define SB_AREA_SZ        (4096ULL)
+
+/*
+ * Size in byte of an area located just after the superblock areas.
+ * Not used in 1.0. Later can be used for MDC0 metadata and/or voting sets.
+ */
+#define MDC0MD_AREA_SZ    (4096ULL)
+
+/*
+ * sb API functions
+ */
+
+/**
+ * sb_magic_check() - check for sb magic value
+ * @dparm: struct pd_dev_parm *
+ *
+ * Determine if the mpool magic value exists in at least one place where
+ * expected on drive pd.  Does NOT imply drive has a valid superblock.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 1 if found, 0 if not found, -(errno) if error reading
+ */
+int sb_magic_check(struct pd_dev_parm *dparm);
+
+/**
+ * sb_write_new() - write superblock to new drive
+ * @dparm: struct pd_dev_parm *
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Write superblock sb to new (non-pool) drive
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_write_new(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb);
+
+/**
+ * sb_write_update() - update superblock
+ * @dparm: "dparm" info is not used to fill up the super block, only "sb" content is used.
+ * @sb: "sb" content is written in the super block.
+ *
+ * Update superblock on pool drive
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_write_update(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb);
+
+/**
+ * sb_erase() - erase superblock
+ * @dparm: struct pd_dev_parm *
+ *
+ * Erase superblock on drive pd.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_erase(struct pd_dev_parm *dparm);
+
+/**
+ * sb_read() - read superblock
+ * @dparm: struct pd_dev_parm *
+ * @sb: struct omf_sb_descriptor *
+ * @omf_ver: omf sb version
+ * @force:
+ *
+ * Read superblock from drive pd; make repairs as necessary.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_read(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb, u16 *omf_ver, bool force);
+
+/**
+ * sbutil_mdc0_clear() - clear mdc0 of superblock
+ * @sb: struct omf_sb_descriptor *)
+ *
+ * Clear (set to zeros) mdc0 portion of sb.
+ *
+ * Return: void
+ */
+void sbutil_mdc0_clear(struct omf_sb_descriptor *sb);
+
+/**
+ * sbutil_mdc0_isclear() - Test if mdc0 is clear
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Return: 1 if mdc0 portion of sb is clear.
+ */
+int sbutil_mdc0_isclear(struct omf_sb_descriptor *sb);
+
+/**
+ * sbutil_mdc0_copy() - copy mdc0 from one superblock to another
+ * @tgtsb: struct omf_sb_descriptor *
+ * @srcsb: struct omf_sb_descriptor *
+ *
+ * Copy mdc0 portion of srcsb to tgtsb.
+ *
+ * Return void
+ */
+void sbutil_mdc0_copy(struct omf_sb_descriptor *tgtsb, struct omf_sb_descriptor *srcsb);
+
+/**
+ * sbutil_mdc0_isvalid() - validate mdc0 of a superblock
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Validate mdc0 portion of sb and extract mdparm.
+ * Return: 1 if valid and mdparm set; 0 otherwise.
+ */
+int sbutil_mdc0_isvalid(struct omf_sb_descriptor *sb);
+
+/**
+ * sb_zones_for_sbs() - compute how many zones are needed to contain the superblocks.
+ * @pd_prop:
+ */
+static inline u32 sb_zones_for_sbs(struct pd_prop *pd_prop)
+{
+	u32 zonebyte;
+
+	zonebyte = pd_prop->pdp_zparam.dvb_zonepg << PAGE_SHIFT;
+
+	return (2 * (SB_AREA_SZ + MDC0MD_AREA_SZ) + (zonebyte - 1)) / zonebyte;
+}
+
+int sb_init(void) __cold;
+void sb_exit(void) __cold;
+
+#endif /* MPOOL_SB_PRIV_H */
diff --git a/drivers/mpool/smap.h b/drivers/mpool/smap.h
new file mode 100644
index 000000000000..b9b72d3182c6
--- /dev/null
+++ b/drivers/mpool/smap.h
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc.  All rights reserved.
+ */
+
+#ifndef MPOOL_SMAP_H
+#define MPOOL_SMAP_H
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/rbtree.h>
+#include <linux/workqueue.h>
+
+#include "mpool_ioctl.h"
+
+/* Forward Decls */
+struct mpool_usage;
+struct mpool_devprops;
+struct mc_smap_parms;
+struct mpool_descriptor;
+
+/*
+ * Common defs
+ */
+
+/**
+ * struct rmbkt - region map bucket
+ */
+struct rmbkt {
+	struct mutex    pdi_rmlock;
+	struct rb_root  pdi_rmroot;
+} ____cacheline_aligned;
+
+/**
+ * struct smap_zone -
+ * @smz_node:
+ * @smz_key:
+ * @smz_value:
+ */
+struct smap_zone {
+	struct rb_node  smz_node;
+	u64             smz_key;
+	u64             smz_value;
+};
+
+/**
+ * enum smap_space_type - space allocation policy flag
+ * @SMAP_SPC_UNDEF:
+ * @SMAP_SPC_USABLE_ONLY:    allocate from usable space only
+ * @SMAP_SPC_USABLE_2_SPARE: allocate from usable space first then spare
+ *                          if needed
+ * @SMAP_SPC_SPARE_ONLY:     allocate from spare space only
+ * @SMAP_SPC_SPARE_2_USABLE: allocate from spare space first then usable
+ *                          if needed
+ */
+enum smap_space_type {
+	SMAP_SPC_UNDEF           = 0,
+	SMAP_SPC_USABLE_ONLY     = 1,
+	SMAP_SPC_USABLE_2_SPARE  = 2,
+	SMAP_SPC_SPARE_ONLY      = 3,
+	SMAP_SPC_SPARE_2_USABLE  = 4
+};
+
+static inline int saptype_valid(enum smap_space_type saptype)
+{
+	return (saptype && saptype <= 4);
+}
+
+/*
+ * drive allocation info
+ *
+ * LOCKING:
+ * + rgnsz, rgnladdr: constants; no locking required
+ * + all other fields: protected by dalock
+ */
+
+/**
+ * struct smap_dev_alloc -
+ * @sda_dalock:
+ * @sda_rgnsz:    number of zones per rgn, excepting last
+ * @sda_rgnladdr: address of first zone in last rgn
+ * @sda_rgnalloc: rgn last alloced from
+ * @sda_zoneeff:    total zones (zonetot) minus bad zones
+ * @sda_utgt:      target max usable zones to allocate
+ * @sda_uact:      actual usable zones allocated
+ * @sda_stgt:      target max spare zones to allocate
+ * @sda_sact       actual spare zones allocated
+ *
+ * NOTE:
+ * + must maintain invariant that sact <= stgt
+ * + however it is possible for uact > utgt due to changing % spare
+ *   zones or zone failures.  this condition corrects when
+ *   sufficient space is freed or if % spare zones is changed
+ *   (again).
+ *
+ * Capacity pools and calcs:
+ * + total zones = zonetot
+ * + avail zones = zoneeff
+ * + usable zones = utgt which is (zoneeff * (1 - spzone/100))
+ * + free usable zones = max(0, utgt - uact); max handles uact > utgt
+ * + used zones = uact; possible for used > usable (uact > utgt)
+ * + spare zones = stgt which is (zoneeff - utgt)
+ * + free spare zones = (stgt - sact); guaranteed that sact <= stgt
+ */
+struct smap_dev_alloc {
+	spinlock_t sda_dalock;
+	u32        sda_rgnsz;
+	u32        sda_rgnladdr;
+	u32        sda_rgnalloc;
+	u32        sda_zoneeff;
+	u32        sda_utgt;
+	u32        sda_uact;
+	u32        sda_stgt;
+	u32        sda_sact;
+};
+
+struct smap_dev_znstats {
+	u64    sdv_total;
+	u64    sdv_avail;
+	u64    sdv_usable;
+	u64    sdv_fusable;
+	u64    sdv_spare;
+	u64    sdv_fspare;
+	u64    sdv_used;
+};
+
+/**
+ * smap_usage_work - delayed work struct for checking mpool free usable space usage
+ * @smapu_wstruct:
+ * @smapu_mp:
+ * @smapu_freepct: free space %
+ */
+struct smap_usage_work {
+	struct delayed_work             smapu_wstruct;
+	struct mpool_descriptor        *smapu_mp;
+	int                             smapu_freepct;
+};
+
+/*
+ * smap API functions
+ */
+
+/*
+ * Return: all smap fns can return -errno with the following errno values
+ * on failure:
+ * + -EINVAL = invalid fn args
+ * + -ENOSPC = unable to allocate requested space
+ * + -ENOMEM = insufficient memory to complete operation
+ */
+
+/*
+ * smap API usage notes:
+ * + During mpool activation call smap_insert() for all existing objects
+ *   before calling smap_alloc() or smap_free().
+ */
+
+/**
+ * smap_mpool_init() - initialize the smaps for an initialized mpool_descriptor
+ * @mp: struct mpool_descriptor *
+ *
+ * smap_mpool_init must be called once per mpool as it is being activated.
+ *
+ * Init space maps for all drives in mpool that are empty except for
+ * superblocks; caller must ensure no other thread can access mp.
+ *
+ * TODO: Traversing smap rbtrees may need fix, since there may be unsafe
+ * erases within loops.
+ *
+ * Return:
+ * 0 if successful, -errno with the following errno values on failure:
+ * -EINVAL if spare zone percentage is > 100%,
+ * -EINVAL if rgn count is 0, or
+ * -EINVAL if zonecnt on one of the drives is < rgn count
+ * -ENOMEM if there is no memory available
+ */
+int smap_mpool_init(struct mpool_descriptor *mp);
+
+/**
+ * smap_mpool_free() - free smap structures in a mpool_descriptor
+ * @mp: struct mpool_descriptor *
+ *
+ * Free space maps for all drives in mpool; caller must ensure no other
+ * thread can access mp.
+ *
+ * Return: void
+ */
+void smap_mpool_free(struct mpool_descriptor *mp);
+
+/**
+ * smap_mpool_usage() - present stats of smap usage
+ * @mp: struct mpool_descriptor *
+ * @mclass: media class or MP_MED_ALL for all classes
+ * @usage: struct mpool_usage *
+ *
+ * Fill in stats with space usage for media class; if MP_MED_ALL
+ * report on all media classes; caller must hold mp.pdvlock.
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ *	    be protected against media classes updates.
+ */
+void smap_mpool_usage(struct mpool_descriptor *mp, u8 mclass, struct mpool_usage *usage);
+
+/**
+ * smap_drive_spares() - Set percentage of zones to set aside as spares
+ * @mp: struct mpool_descriptor *
+ * @mclassp: media class
+ * @spzone: percentage of zones to use as spares
+ *
+ * Set percent spare zones to spzone for drives in media class mclass;
+ * caller must hold mp.pdvlock.
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ *	    be protected against media classes updates.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int smap_drive_spares(struct mpool_descriptor *mp, enum mp_media_classp mclassp, u8 spzone);
+
+/**
+ * smap_drive_usage() - Fill in a given drive's portion of dprop struct.
+ * @mp:    struct mpool_descriptor *
+ * @pdh:   drive number within the mpool_descriptor
+ * @dprop: struct mpool_devprops *, structure to fill in
+ *
+ * Fill in usage portion of dprop for drive pdh; caller must hold mp.pdvlock
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_drive_usage(struct mpool_descriptor *mp, u16 pdh, struct mpool_devprops *dprop);
+
+/**
+ * smap_drive_init() - Initialize a specific drive within a mpool_descriptor
+ * @mp:    struct mpool_descriptor *
+ * @mcsp:  smap parameters
+ * @pdh:   u16, drive number within the mpool_descriptor
+ *
+ * Init space map for pool drive pdh that is empty except for superblocks
+ * with a percent spare zones of spzone; caller must ensure pdh is not in use.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_drive_init(struct mpool_descriptor *mp, struct mc_smap_parms *mcsp, u16 pdh);
+
+/**
+ * smap_drive_free() - Release resources for a specific drive
+ * @mp:  struct mpool_descriptor *
+ * @pdh: u16, drive number within the mpool_descriptor
+ *
+ * Free space map for pool drive pdh including partial (failed) inits;
+ * caller must ensure pdh is not in use.
+ *
+ * Return: void
+ */
+void smap_drive_free(struct mpool_descriptor *mp, u16 pdh);
+
+/**
+ * smap_insert() - Inject an entry to an smap for existing object
+ * @mp: struct mpool_descriptor *
+ * @pdh: drive number within the mpool_descriptor
+ * @zoneaddr: starting zone for entry
+ * @zonecnt: number of zones in entry
+ *
+ * Add entry to space map for an existing object with a strip on drive pdh
+ * starting at zoneaddr and continuing for zonecnt blocks.
+ *
+ * Used, in part for superblocks.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_insert(struct mpool_descriptor *mp, u16 pdh, u64 zoneaddr, u32 zonecnt);
+
+/**
+ * smap_alloc() - Allocate a new contiguous zone range on a specific drive
+ * @mp: struct mpool_descriptor
+ * @pdh: u16, drive number within the mpool_descriptor
+ * @zonecnt: u64, the number of zones requested
+ * @sapolicy: enum smap_space_type, usable only, spare only, etc.
+ * @zoneaddr: u64 *, the starting zone for the allocated range
+ * @align: no. of zones (must be a power-of-2)
+ *
+ * Attempt to allocate zonecnt contiguous zones on drive pdh
+ * in accordance with space allocation policy sapolicy.
+ *
+ * Return: 0 if succcessful; -errno otherwise
+ */
+int smap_alloc(struct mpool_descriptor *mp, u16 pdh, u64 zonecnt,
+	       enum smap_space_type sapolicy, u64 *zoneaddr, u64 align);
+
+/**
+ * smap_free() - Free a previously allocated range of zones in the smap
+ * @mp: struct mpool_descriptor *
+ * @pdh: u16, number of the disk within the mpool_descriptor
+ * @zoneaddr: u64, starting zone for the range to free
+ * @zonecnt: u16, the number of zones in the range
+ *
+ * Free currently allocated space starting at zoneaddr
+ * and continuing for zonecnt blocks.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_free(struct mpool_descriptor *mp, u16 pdh, u64 zoneaddr, u16 zonecnt);
+
+/*
+ * smap internal functions
+ */
+
+/**
+ * smap_mpool_usage() - Get the media class usage for a given mclass.
+ * @mp:
+ * @mclass: if MP_MED_ALL, return the sum of the stats for all media class,
+ *	else the stats only for one media class.
+ * @usage: output
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ *	    be protected against media classes updates.
+ */
+void smap_mclass_usage(struct mpool_descriptor *mp, u8 mclass, struct mpool_usage *usage);
+
+/**
+ * smap_log_mpool_usage() - check drive mpool free usable space %, and log a message if needed
+ * @ws:
+ */
+void smap_log_mpool_usage(struct work_struct *ws);
+
+/**
+ * smap_wait_usage_done() - wait for periodical job for logging pd free usable space % to complete
+ * @mp:
+ */
+void smap_wait_usage_done(struct mpool_descriptor *mp);
+
+int smap_init(void) __cold;
+void smap_exit(void) __cold;
+
+#endif /* MPOOL_SMAP_H */
-- 
2.17.2




More information about the Linux-nvme mailing list