diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
commit | 9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (patch) | |
tree | e7c87b68daba7cf7ca4c342c6b52165bd78fbe16 /include/linux | |
parent | 9360b53661a2c7754517b2925580055bacc8ec38 (diff) | |
parent | d2ec180c23a5a1bfe34d8638b0342a47c00cf70f (diff) | |
download | lwn-9228ff90387e276ad67b10c0eb525c9d6a57d5e9.tar.gz lwn-9228ff90387e276ad67b10c0eb525c9d6a57d5e9.zip |
Merge branch 'for-3.8/drivers' of git://git.kernel.dk/linux-block
Pull block driver update from Jens Axboe:
"Now that the core bits are in, here are the driver bits for 3.8. The
branch contains:
- A huge pile of drbd bits that were dumped from the 3.7 merge
window. Following that, it was both made perfectly clear that
there is going to be no more over-the-wall pulls and how the
situation on individual pulls can be improved.
- A few cleanups from Akinobu Mita for drbd and cciss.
- Queue improvement for loop from Lukas. This grew into adding a
generic interface for waiting/checking an even with a specific
lock, allowing this to be pulled out of md and now loop and drbd is
also using it.
- A few fixes for xen back/front block driver from Roger Pau Monne.
- Partition improvements from Stephen Warren, allowing partiion UUID
to be used as an identifier."
* 'for-3.8/drivers' of git://git.kernel.dk/linux-block: (609 commits)
drbd: update Kconfig to match current dependencies
drbd: Fix drbdsetup wait-connect, wait-sync etc... commands
drbd: close race between drbd_set_role and drbd_connect
drbd: respect no-md-barriers setting also when changed online via disk-options
drbd: Remove obsolete check
drbd: fixup after wait_even_lock_irq() addition to generic code
loop: Limit the number of requests in the bio list
wait: add wait_event_lock_irq() interface
xen-blkfront: free allocated page
xen-blkback: move free persistent grants code
block: partition: msdos: provide UUIDs for partitions
init: reduce PARTUUID min length to 1 from 36
block: store partition_meta_info.uuid as a string
cciss: use check_signature()
cciss: cleanup bitops usage
drbd: use copy_highpage
drbd: if the replication link breaks during handshake, keep retrying
drbd: check return of kmalloc in receive_uuids
drbd: Broadcast sync progress no more often than once per second
drbd: don't try to clear bits once the disk has failed
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/drbd.h | 81 | ||||
-rw-r--r-- | include/linux/drbd_genl.h | 378 | ||||
-rw-r--r-- | include/linux/drbd_genl_api.h | 55 | ||||
-rw-r--r-- | include/linux/drbd_limits.h | 90 | ||||
-rw-r--r-- | include/linux/drbd_nl.h | 163 | ||||
-rw-r--r-- | include/linux/drbd_tag_magic.h | 84 | ||||
-rw-r--r-- | include/linux/genhd.h | 8 | ||||
-rw-r--r-- | include/linux/genl_magic_func.h | 422 | ||||
-rw-r--r-- | include/linux/genl_magic_struct.h | 277 | ||||
-rw-r--r-- | include/linux/idr.h | 11 | ||||
-rw-r--r-- | include/linux/loop.h | 3 | ||||
-rw-r--r-- | include/linux/lru_cache.h | 67 | ||||
-rw-r--r-- | include/linux/wait.h | 164 |
13 files changed, 1473 insertions, 330 deletions
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 47e3d4850584..0c5a18ec322c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,12 +51,11 @@ #endif - extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.13" -#define API_VERSION 88 +#define REL_VERSION "8.4.2" +#define API_VERSION 1 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 101 enum drbd_io_error_p { @@ -66,7 +65,8 @@ enum drbd_io_error_p { }; enum drbd_fencing_p { - FP_DONT_CARE, + FP_NOT_AVAIL = -1, /* Not a policy */ + FP_DONT_CARE = 0, FP_RESOURCE, FP_STONITH }; @@ -102,6 +102,20 @@ enum drbd_on_congestion { OC_DISCONNECT, }; +enum drbd_read_balancing { + RB_PREFER_LOCAL, + RB_PREFER_REMOTE, + RB_ROUND_ROBIN, + RB_LEAST_PENDING, + RB_CONGESTED_REMOTE, + RB_32K_STRIPING, + RB_64K_STRIPING, + RB_128K_STRIPING, + RB_256K_STRIPING, + RB_512K_STRIPING, + RB_1M_STRIPING, +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_code { ERR_CODE_BASE = 100, @@ -122,7 +136,7 @@ enum drbd_ret_code { ERR_AUTH_ALG = 120, ERR_AUTH_ALG_ND = 121, ERR_NOMEM = 122, - ERR_DISCARD = 123, + ERR_DISCARD_IMPOSSIBLE = 123, ERR_DISK_CONFIGURED = 124, ERR_NET_CONFIGURED = 125, ERR_MANDATORY_TAG = 126, @@ -130,8 +144,8 @@ enum drbd_ret_code { ERR_INTR = 129, /* EINTR */ ERR_RESIZE_RESYNC = 130, ERR_NO_PRIMARY = 131, - ERR_SYNC_AFTER = 132, - ERR_SYNC_AFTER_CYCLE = 133, + ERR_RESYNC_AFTER = 132, + ERR_RESYNC_AFTER_CYCLE = 133, ERR_PAUSE_IS_SET = 134, ERR_PAUSE_IS_CLEAR = 135, ERR_PACKET_NR = 137, @@ -155,6 +169,14 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_RES_NOT_KNOWN = 158, + ERR_RES_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, + ERR_INVALID_REQUEST = 162, + ERR_NEED_APV_100 = 163, + ERR_NEED_ALLOW_TWO_PRI = 164, + ERR_MD_UNCLEAN = 165, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -296,7 +318,8 @@ enum drbd_state_rv { SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ - SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ + SS_O_VOL_PEER_PRI = -20, + SS_AFTER_LAST_ERROR = -21, /* Keep this at bottom */ }; /* from drbd_strings.c */ @@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); #define MDF_FULL_SYNC (1 << 3) #define MDF_WAS_UP_TO_DATE (1 << 4) #define MDF_PEER_OUT_DATED (1 << 5) -#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_CRASHED_PRIMARY (1 << 6) +#define MDF_AL_CLEAN (1 << 7) +#define MDF_AL_DISABLED (1 << 8) enum drbd_uuid_index { UI_CURRENT, @@ -333,37 +358,23 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) +/* magic numbers used in meta data and network packets */ #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) +#define DRBD_MAGIC_100 0x8620ec20 + +#define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3) +#define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4) +#define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5) + + +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 #define DRBD_MD_INDEX_FLEX_INT -3 -/* Start of the new netlink/connector stuff */ - -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 - - -/* For searching a vacant cn_idx value */ -#define CN_IDX_STEP 6977 - -struct drbd_nl_cfg_req { - int packet_type; - unsigned int drbd_minor; - int flags; - unsigned short tag_list[]; -}; - -struct drbd_nl_cfg_reply { - int packet_type; - unsigned int minor; - int ret_code; /* enum ret_code or set_st_err_t */ - unsigned short tag_list[]; /* only used with get_* calls */ -}; - #endif diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 000000000000..d0d8fac8a6e4 --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,378 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr|<payload> + * + * payload: + * |optional fixed size family header|<sequence of netlink attributes> + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len|<nested nla sequence> + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static <struct_name>_nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, DRBD_GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + __u32_field(1, DRBD_GENLA_F_MANDATORY, ctx_volume) + __str_field(2, DRBD_GENLA_F_MANDATORY, ctx_resource_name, 128) + __bin_field(3, DRBD_GENLA_F_MANDATORY, ctx_my_addr, 128) + __bin_field(4, DRBD_GENLA_F_MANDATORY, ctx_peer_addr, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT, backing_dev, 128) + __str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev, 128) + __s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT, meta_dev_idx) + + /* use the resize command to try and change the disk_size */ + __u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, disk_size) + /* we could change the max_bio_bvecs, + * but it won't propagate through the stack */ + __u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, max_bio_bvecs) + + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, on_io_error, DRBD_ON_IO_ERROR_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, fencing, DRBD_FENCING_DEF) + + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, resync_rate, DRBD_RESYNC_RATE_DEF) + __s32_field_def(9, DRBD_GENLA_F_MANDATORY, resync_after, DRBD_MINOR_NUMBER_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, al_extents, DRBD_AL_EXTENTS_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, c_delay_target, DRBD_C_DELAY_TARGET_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + + __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) + __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) + __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) + __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ + __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) +) + +GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, + __str_field_def(1, DRBD_GENLA_F_MANDATORY, cpu_mask, 32) + __u32_field_def(2, DRBD_GENLA_F_MANDATORY, on_no_data, DRBD_ON_NO_DATA_DEF) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field_def(1, DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field_def(2, DRBD_GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field_def(3, DRBD_GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field_def(4, DRBD_GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field_def(5, DRBD_GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __u32_field_def(6, DRBD_GENLA_F_MANDATORY, wire_protocol, DRBD_PROTOCOL_DEF) + __u32_field_def(7, DRBD_GENLA_F_MANDATORY, connect_int, DRBD_CONNECT_INT_DEF) + __u32_field_def(8, DRBD_GENLA_F_MANDATORY, timeout, DRBD_TIMEOUT_DEF) + __u32_field_def(9, DRBD_GENLA_F_MANDATORY, ping_int, DRBD_PING_INT_DEF) + __u32_field_def(10, DRBD_GENLA_F_MANDATORY, ping_timeo, DRBD_PING_TIMEO_DEF) + __u32_field_def(11, DRBD_GENLA_F_MANDATORY, sndbuf_size, DRBD_SNDBUF_SIZE_DEF) + __u32_field_def(12, DRBD_GENLA_F_MANDATORY, rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) + __u32_field_def(13, DRBD_GENLA_F_MANDATORY, ko_count, DRBD_KO_COUNT_DEF) + __u32_field_def(14, DRBD_GENLA_F_MANDATORY, max_buffers, DRBD_MAX_BUFFERS_DEF) + __u32_field_def(15, DRBD_GENLA_F_MANDATORY, max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) + __u32_field_def(16, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) + __u32_field_def(17, DRBD_GENLA_F_MANDATORY, after_sb_0p, DRBD_AFTER_SB_0P_DEF) + __u32_field_def(18, DRBD_GENLA_F_MANDATORY, after_sb_1p, DRBD_AFTER_SB_1P_DEF) + __u32_field_def(19, DRBD_GENLA_F_MANDATORY, after_sb_2p, DRBD_AFTER_SB_2P_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, rr_conflict, DRBD_RR_CONFLICT_DEF) + __u32_field_def(21, DRBD_GENLA_F_MANDATORY, on_congestion, DRBD_ON_CONGESTION_DEF) + __u32_field_def(22, DRBD_GENLA_F_MANDATORY, cong_fill, DRBD_CONG_FILL_DEF) + __u32_field_def(23, DRBD_GENLA_F_MANDATORY, cong_extents, DRBD_CONG_EXTENTS_DEF) + __flg_field_def(24, DRBD_GENLA_F_MANDATORY, two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) + __flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, discard_my_data) + __flg_field_def(26, DRBD_GENLA_F_MANDATORY, tcp_cork, DRBD_TCP_CORK_DEF) + __flg_field_def(27, DRBD_GENLA_F_MANDATORY, always_asbp, DRBD_ALWAYS_ASBP_DEF) + __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) + __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) + /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size) + __flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force) + __flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, DRBD_GENLA_F_MANDATORY, sib_reason) + __u32_field(2, DRBD_F_REQUIRED, current_state) + __u64_field(3, DRBD_GENLA_F_MANDATORY, capacity) + __u64_field(4, DRBD_GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, DRBD_GENLA_F_MANDATORY, prev_state) + __u32_field(6, DRBD_GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, DRBD_GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, DRBD_GENLA_F_MANDATORY, disk_flags) + __u64_field(9, DRBD_GENLA_F_MANDATORY, bits_total) + __u64_field(10, DRBD_GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, DRBD_GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, DRBD_GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, DRBD_GENLA_F_MANDATORY, helper, 32) + __u32_field(14, DRBD_GENLA_F_MANDATORY, helper_exit_code) + + __u64_field(15, 0, send_cnt) + __u64_field(16, 0, recv_cnt) + __u64_field(17, 0, read_cnt) + __u64_field(18, 0, writ_cnt) + __u64_field(19, 0, al_writ_cnt) + __u64_field(20, 0, bm_writ_cnt) + __u32_field(21, 0, ap_bio_cnt) + __u32_field(22, 0, ap_pending_cnt) + __u32_field(23, 0, rs_pending_cnt) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, DRBD_GENLA_F_MANDATORY, ov_start_sector) + __u64_field(2, DRBD_GENLA_F_MANDATORY, ov_stop_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, DRBD_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) +) + +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) +) + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + + /* add or delete resources */ +GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + +GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, + GENL_doit(drbd_adm_resource_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_CHG_NET_OPTS, 29, + GENL_doit(drbd_adm_net_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED) +) + +GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, + GENL_doit(drbd_adm_disk_opts), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) +GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 000000000000..9ef50d51e34e --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include <linux/drbd.h> +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h> +#include <linux/genl_magic_struct.h> + +#endif diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index fb670bf603f7..1fa19c5f5e64 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -16,29 +16,37 @@ #define DEBUG_RANGE_CHECK 0 #define DRBD_MINOR_COUNT_MIN 1 -#define DRBD_MINOR_COUNT_MAX 256 +#define DRBD_MINOR_COUNT_MAX 255 #define DRBD_MINOR_COUNT_DEF 32 +#define DRBD_MINOR_COUNT_SCALE '1' + +#define DRBD_VOLUME_MAX 65535 #define DRBD_DIALOG_REFRESH_MIN 0 #define DRBD_DIALOG_REFRESH_MAX 600 +#define DRBD_DIALOG_REFRESH_SCALE '1' /* valid port number */ #define DRBD_PORT_MIN 1 #define DRBD_PORT_MAX 0xffff +#define DRBD_PORT_SCALE '1' /* startup { */ /* if you want more than 3.4 days, disable */ #define DRBD_WFC_TIMEOUT_MIN 0 #define DRBD_WFC_TIMEOUT_MAX 300000 #define DRBD_WFC_TIMEOUT_DEF 0 +#define DRBD_WFC_TIMEOUT_SCALE '1' #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 +#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' /* }*/ /* net { */ @@ -47,75 +55,91 @@ #define DRBD_TIMEOUT_MIN 1 #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ +#define DRBD_TIMEOUT_SCALE '1' /* If backing disk takes longer than disk_timeout, mark the disk as failed */ #define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ #define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ +#define DRBD_CONNECT_INT_SCALE '1' /* keep-alive probes when idle */ #define DRBD_PING_INT_MIN 1 #define DRBD_PING_INT_MAX 120 #define DRBD_PING_INT_DEF 10 +#define DRBD_PING_INT_SCALE '1' /* timeout for the ping packets.*/ #define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MAX 300 #define DRBD_PING_TIMEO_DEF 5 +#define DRBD_PING_TIMEO_SCALE '1' /* max number of write requests between write barriers */ #define DRBD_MAX_EPOCH_SIZE_MIN 1 #define DRBD_MAX_EPOCH_SIZE_MAX 20000 #define DRBD_MAX_EPOCH_SIZE_DEF 2048 +#define DRBD_MAX_EPOCH_SIZE_SCALE '1' /* I don't think that a tcp send buffer of more than 10M is useful */ #define DRBD_SNDBUF_SIZE_MIN 0 #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF 0 +#define DRBD_SNDBUF_SIZE_SCALE '1' #define DRBD_RCVBUF_SIZE_MIN 0 #define DRBD_RCVBUF_SIZE_MAX (10<<20) #define DRBD_RCVBUF_SIZE_DEF 0 +#define DRBD_RCVBUF_SIZE_SCALE '1' /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 #define DRBD_MAX_BUFFERS_DEF 2048 +#define DRBD_MAX_BUFFERS_SCALE '1' /* @4k PageSize -> 4kB - 512MB */ #define DRBD_UNPLUG_WATERMARK_MIN 1 #define DRBD_UNPLUG_WATERMARK_MAX 131072 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) +#define DRBD_UNPLUG_WATERMARK_SCALE '1' /* 0 is disabled. * 200 should be more than enough even for very short timeouts */ #define DRBD_KO_COUNT_MIN 0 #define DRBD_KO_COUNT_MAX 200 -#define DRBD_KO_COUNT_DEF 0 +#define DRBD_KO_COUNT_DEF 7 +#define DRBD_KO_COUNT_SCALE '1' /* } */ /* syncer { */ /* FIXME allow rate to be zero? */ -#define DRBD_RATE_MIN 1 +#define DRBD_RESYNC_RATE_MIN 1 /* channel bonding 10 GbE, or other hardware */ -#define DRBD_RATE_MAX (4 << 20) -#define DRBD_RATE_DEF 250 /* kb/second */ +#define DRBD_RESYNC_RATE_MAX (4 << 20) +#define DRBD_RESYNC_RATE_DEF 250 +#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 -#define DRBD_AL_EXTENTS_DEF 127 +#define DRBD_AL_EXTENTS_MAX 6433 +#define DRBD_AL_EXTENTS_DEF 1237 +#define DRBD_AL_EXTENTS_SCALE '1' -#define DRBD_AFTER_MIN -1 -#define DRBD_AFTER_MAX 255 -#define DRBD_AFTER_DEF -1 +#define DRBD_MINOR_NUMBER_MIN -1 +#define DRBD_MINOR_NUMBER_MAX ((1 << 20) - 1) +#define DRBD_MINOR_NUMBER_DEF -1 +#define DRBD_MINOR_NUMBER_SCALE '1' /* } */ @@ -124,11 +148,12 @@ * the upper limit with 64bit kernel, enough ram and flexible meta data * is 1 PiB, currently. */ /* DRBD_MAX_SECTORS */ -#define DRBD_DISK_SIZE_SECT_MIN 0 -#define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) -#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_MIN 0 +#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40)) +#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */ +#define DRBD_DISK_SIZE_SCALE 's' /* sectors */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT @@ -136,38 +161,59 @@ #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR #define DRBD_ON_CONGESTION_DEF OC_BLOCK +#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 #define DRBD_MAX_BIO_BVECS_DEF 0 +#define DRBD_MAX_BIO_BVECS_SCALE '1' #define DRBD_C_PLAN_AHEAD_MIN 0 #define DRBD_C_PLAN_AHEAD_MAX 300 -#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */ +#define DRBD_C_PLAN_AHEAD_DEF 20 +#define DRBD_C_PLAN_AHEAD_SCALE '1' #define DRBD_C_DELAY_TARGET_MIN 1 #define DRBD_C_DELAY_TARGET_MAX 100 #define DRBD_C_DELAY_TARGET_DEF 10 +#define DRBD_C_DELAY_TARGET_SCALE '1' #define DRBD_C_FILL_TARGET_MIN 0 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ -#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ +#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ +#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */ -#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */ +#define DRBD_C_MAX_RATE_MIN 250 #define DRBD_C_MAX_RATE_MAX (4 << 20) #define DRBD_C_MAX_RATE_DEF 102400 +#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */ -#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */ +#define DRBD_C_MIN_RATE_MIN 0 #define DRBD_C_MIN_RATE_MAX (4 << 20) -#define DRBD_C_MIN_RATE_DEF 4096 +#define DRBD_C_MIN_RATE_DEF 250 +#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */ #define DRBD_CONG_FILL_MIN 0 #define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */ #define DRBD_CONG_FILL_DEF 0 +#define DRBD_CONG_FILL_SCALE 's' /* sectors */ #define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN #define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX #define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF +#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE + +#define DRBD_PROTOCOL_DEF DRBD_PROT_C + +#define DRBD_DISK_BARRIER_DEF 0 +#define DRBD_DISK_FLUSHES_DEF 1 +#define DRBD_DISK_DRAIN_DEF 1 +#define DRBD_MD_FLUSHES_DEF 1 +#define DRBD_TCP_CORK_DEF 1 +#define DRBD_AL_UPDATES_DEF 1 + +#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 +#define DRBD_ALWAYS_ASBP_DEF 0 +#define DRBD_USE_RLE_DEF 1 -#undef RANGE #endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h deleted file mode 100644 index a8706f08ab36..000000000000 --- a/include/linux/drbd_nl.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - PAKET( name, - TYPE ( pn, pr, member ) - ... - ) - - You may never reissue one of the pn arguments -*/ - -#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) -#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" -#endif - -NL_PACKET(primary, 1, - NL_BIT( 1, T_MAY_IGNORE, primary_force) -) - -NL_PACKET(secondary, 2, ) - -NL_PACKET(disk_conf, 3, - NL_INT64( 2, T_MAY_IGNORE, disk_size) - NL_STRING( 3, T_MANDATORY, backing_dev, 128) - NL_STRING( 4, T_MANDATORY, meta_dev, 128) - NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) - NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) - NL_INTEGER( 7, T_MAY_IGNORE, fencing) - NL_BIT( 37, T_MAY_IGNORE, use_bmbv) - NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) - NL_BIT( 54, T_MAY_IGNORE, no_md_flush) - /* 55 max_bio_size was available in 8.2.6rc2 */ - NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) - NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) - NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) - NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout) -) - -NL_PACKET(detach, 4, - NL_BIT( 88, T_MANDATORY, detach_force) -) - -NL_PACKET(net_conf, 5, - NL_STRING( 8, T_MANDATORY, my_addr, 128) - NL_STRING( 9, T_MANDATORY, peer_addr, 128) - NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) - NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) - NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) - NL_INTEGER( 14, T_MAY_IGNORE, timeout) - NL_INTEGER( 15, T_MANDATORY, wire_protocol) - NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) - NL_INTEGER( 17, T_MAY_IGNORE, ping_int) - NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) - NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) - NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) - NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) - NL_INTEGER( 22, T_MAY_IGNORE, ko_count) - NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) - NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) - NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) - NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) - NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) - NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) - NL_INTEGER( 81, T_MAY_IGNORE, on_congestion) - NL_INTEGER( 82, T_MAY_IGNORE, cong_fill) - NL_INTEGER( 83, T_MAY_IGNORE, cong_extents) - /* 59 addr_family was available in GIT, never released */ - NL_BIT( 60, T_MANDATORY, mind_af) - NL_BIT( 27, T_MAY_IGNORE, want_lose) - NL_BIT( 28, T_MAY_IGNORE, two_primaries) - NL_BIT( 41, T_MAY_IGNORE, always_asbp) - NL_BIT( 61, T_MAY_IGNORE, no_cork) - NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) - NL_BIT( 70, T_MANDATORY, dry_run) -) - -NL_PACKET(disconnect, 6, - NL_BIT( 84, T_MAY_IGNORE, force) -) - -NL_PACKET(resize, 7, - NL_INT64( 29, T_MAY_IGNORE, resize_size) - NL_BIT( 68, T_MAY_IGNORE, resize_force) - NL_BIT( 69, T_MANDATORY, no_resync) -) - -NL_PACKET(syncer_conf, 8, - NL_INTEGER( 30, T_MAY_IGNORE, rate) - NL_INTEGER( 31, T_MAY_IGNORE, after) - NL_INTEGER( 32, T_MAY_IGNORE, al_extents) -/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume) - * NL_INTEGER( 72, T_MAY_IGNORE, dp_interval) - * NL_INTEGER( 73, T_MAY_IGNORE, throttle_th) - * NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th) - * feature will be reimplemented differently with 8.3.9 */ - NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) - NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) - NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) - NL_BIT( 65, T_MAY_IGNORE, use_rle) - NL_INTEGER( 75, T_MAY_IGNORE, on_no_data) - NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead) - NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) - NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) - NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) - NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate) -) - -NL_PACKET(invalidate, 9, ) -NL_PACKET(invalidate_peer, 10, ) -NL_PACKET(pause_sync, 11, ) -NL_PACKET(resume_sync, 12, ) -NL_PACKET(suspend_io, 13, ) -NL_PACKET(resume_io, 14, ) -NL_PACKET(outdate, 15, ) -NL_PACKET(get_config, 16, ) -NL_PACKET(get_state, 17, - NL_INTEGER( 33, T_MAY_IGNORE, state_i) -) - -NL_PACKET(get_uuids, 18, - NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) - NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) -) - -NL_PACKET(get_timeout_flag, 19, - NL_BIT( 36, T_MAY_IGNORE, use_degraded) -) - -NL_PACKET(call_helper, 20, - NL_STRING( 38, T_MAY_IGNORE, helper, 32) -) - -/* Tag nr 42 already allocated in drbd-8.1 development. */ - -NL_PACKET(sync_progress, 23, - NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) -) - -NL_PACKET(dump_ee, 24, - NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) - NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) - NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) - NL_INT64( 48, T_MAY_IGNORE, ee_sector) - NL_INT64( 49, T_MAY_IGNORE, ee_block_id) - NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) -) - -NL_PACKET(start_ov, 25, - NL_INT64( 66, T_MAY_IGNORE, start_sector) -) - -NL_PACKET(new_c_uuid, 26, - NL_BIT( 63, T_MANDATORY, clear_bm) -) - -#ifdef NL_RESPONSE -NL_RESPONSE(return_code_only, 27) -#endif - -#undef NL_PACKET -#undef NL_INTEGER -#undef NL_INT64 -#undef NL_BIT -#undef NL_STRING -#undef NL_RESPONSE diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h deleted file mode 100644 index 82de1f9e48b1..000000000000 --- a/include/linux/drbd_tag_magic.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef DRBD_TAG_MAGIC_H -#define DRBD_TAG_MAGIC_H - -#define TT_END 0 -#define TT_REMOVED 0xE000 - -/* declare packet_type enums */ -enum packet_types { -#define NL_PACKET(name, number, fields) P_ ## name = number, -#define NL_RESPONSE(name, number) P_ ## name = number, -#define NL_INTEGER(pn, pr, member) -#define NL_INT64(pn, pr, member) -#define NL_BIT(pn, pr, member) -#define NL_STRING(pn, pr, member, len) -#include <linux/drbd_nl.h> - P_nl_after_last_packet, -}; - -/* These struct are used to deduce the size of the tag lists: */ -#define NL_PACKET(name, number, fields) \ - struct name ## _tag_len_struct { fields }; -#define NL_INTEGER(pn, pr, member) \ - int member; int tag_and_len ## member; -#define NL_INT64(pn, pr, member) \ - __u64 member; int tag_and_len ## member; -#define NL_BIT(pn, pr, member) \ - unsigned char member:1; int tag_and_len ## member; -#define NL_STRING(pn, pr, member, len) \ - unsigned char member[len]; int member ## _len; \ - int tag_and_len ## member; -#include <linux/drbd_nl.h> - -/* declare tag-list-sizes */ -static const int tag_list_sizes[] = { -#define NL_PACKET(name, number, fields) 2 fields , -#define NL_INTEGER(pn, pr, member) + 4 + 4 -#define NL_INT64(pn, pr, member) + 4 + 8 -#define NL_BIT(pn, pr, member) + 4 + 1 -#define NL_STRING(pn, pr, member, len) + 4 + (len) -#include <linux/drbd_nl.h> -}; - -/* The two highest bits are used for the tag type */ -#define TT_MASK 0xC000 -#define TT_INTEGER 0x0000 -#define TT_INT64 0x4000 -#define TT_BIT 0x8000 -#define TT_STRING 0xC000 -/* The next bit indicates if processing of the tag is mandatory */ -#define T_MANDATORY 0x2000 -#define T_MAY_IGNORE 0x0000 -#define TN_MASK 0x1fff -/* The remaining 13 bits are used to enumerate the tags */ - -#define tag_type(T) ((T) & TT_MASK) -#define tag_number(T) ((T) & TN_MASK) - -/* declare tag enums */ -#define NL_PACKET(name, number, fields) fields -enum drbd_tags { -#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , -#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , -#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , -#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , -#include <linux/drbd_nl.h> -}; - -struct tag { - const char *name; - int type_n_flags; - int max_len; -}; - -/* declare tag names */ -#define NL_PACKET(name, number, fields) fields -static const struct tag tag_descriptions[] = { -#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, -#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, -#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, -#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, -#include <linux/drbd_nl.h> -}; - -#endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4f440b3e89fe..79b8bba19363 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -88,10 +88,14 @@ struct disk_stats { }; #define PARTITION_META_INFO_VOLNAMELTH 64 -#define PARTITION_META_INFO_UUIDLTH 16 +/* + * Enough for the string representation of any kind of UUID plus NULL. + * EFI UUID is 36 characters. MSDOS UUID is 11 characters. + */ +#define PARTITION_META_INFO_UUIDLTH 37 struct partition_meta_info { - u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + char uuid[PARTITION_META_INFO_UUIDLTH]; u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 000000000000..023bc346b877 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,422 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include <linux/genl_magic_struct.h> + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, \ + __put, __is_signed) \ + [attr_nr] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put, __is_signed) \ + [attr_nr] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#ifdef GENL_MAGIC_DEBUG +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +/* *_from_attrs functions are static, but potentially unused */ \ +static int __ ## s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info, bool exclude_invariants) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = info->attrs[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, false); \ +} __attribute__((unused)) \ +static int s_name ## _from_attrs_for_change(struct s_name *s, \ + struct genl_info *info) \ +{ \ + return __ ## s_name ## _from_attrs(s, info, true); \ +} __attribute__((unused)) \ + +#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...) \ + nla = ntb[attr_nr]; \ + if (nla) { \ + if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ + pr_info("<< must not change invariant attr: %s\n", #name); \ + return -EEXIST; \ + } \ + assignment; \ + } else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) { \ + /* attribute missing from payload, */ \ + /* which was expected */ \ + } else if ((attr_flag) & DRBD_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla)) + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ + __assign(attr_nr, attr_flag, name, nla_type, type, \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla)) + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include <linux/stringify.h> +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + if (__put(skb, attr_nr, s->name)) \ + goto nla_put_failure; \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ + if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + if (__put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name)) \ + goto nla_put_failure; \ + } + +#include GENL_MAGIC_INCLUDE_FILE + + +/* Functions for initializing structs to default values. */ + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) +#undef __u32_field_def +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __s32_field_def +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __flg_field_def +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + x->name = default; +#undef __str_field_def +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + memset(x->name, 0, sizeof(x->name)); \ + x->name ## _len = 0; +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \ +static void set_ ## s_name ## _defaults(struct s_name *x) { \ +s_fields \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 000000000000..eecd19b37001 --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,277 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include <linux/genetlink.h> +#include <linux/types.h> + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/* + * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not + * know about. This flag can be set in nlattr->nla_type to indicate that this + * attribute must not be ignored. + * + * We check and remove this flag in drbd_nla_check_mandatory() before + * validating the attribute types and lengths via nla_parse_nested(). + */ +#define DRBD_GENLA_F_MANDATORY (1 << 14) + +/* + * Flags specific to drbd and not visible at the netlink layer, used in + * <struct>_from_attrs and <struct>_to_skb: + * + * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is + * invalid. + * + * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be + * included in unpriviledged get requests or broadcasts. + * + * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but + * cannot subsequently be changed. + */ +#define DRBD_F_REQUIRED (1 << 0) +#define DRBD_F_SENSITIVE (1 << 1) +#define DRBD_F_INVARIANT (1 << 2) + +#define __nla_type(x) ((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, char, \ + nla_get_u8, nla_put_u8, false) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, nla_put_u8, false) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, nla_put_u16, false) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, nla_put_u32, false) +#define __s32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __s32, \ + nla_get_u32, nla_put_u32, true) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, nla_put_u64, false) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, nla_put, false) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, nla_put, false) + +/* fields with default values */ +#define __flg_field_def(attr_nr, attr_flag, name, default) \ + __flg_field(attr_nr, attr_flag, name) +#define __u32_field_def(attr_nr, attr_flag, name, default) \ + __u32_field(attr_nr, attr_flag, name) +#define __s32_field_def(attr_nr, attr_flag, name, default) \ + __s32_field(attr_nr, attr_flag, name) +#define __str_field_def(attr_nr, attr_flag, name, maxlen) \ + __str_field(attr_nr, attr_flag, name, maxlen) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, \ + __get, __put, __is_signed) \ + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, \ + maxlen, __get, __put, __is_signed) \ + T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct <name> { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + __is_signed) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, __is_signed) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put, \ + is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, \ + __get, __put, is_signed) \ + F_ ## name ## _IS_SIGNED = is_signed, + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 87259a44c251..de7e190f1af4 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ diff --git a/include/linux/loop.h b/include/linux/loop.h index 6492181bcb1d..460b60fa7adf 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -53,10 +53,13 @@ struct loop_device { spinlock_t lo_lock; struct bio_list lo_bio_list; + unsigned int lo_bio_count; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; wait_queue_head_t lo_event; + /* wait queue for incoming requests */ + wait_queue_head_t lo_req_wait; struct request_queue *lo_queue; struct gendisk *lo_disk; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index cafc7f99e124..4019013c6593 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,32 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * - * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. */ -static inline int lc_try_lock(struct lru_cache *lc) +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) { - return !test_and_set_bit(__LC_DIRTY, &lc->flags); + return !test_and_set_bit(__LC_LOCKED, &lc->flags); } /** + * lc_try_lock - variant to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). + */ +extern int lc_try_lock(struct lru_cache *lc); + +/** * lc_unlock - unlock @lc, allow lc_get() to change the set again * @lc: the lru cache to operate on */ static inline void lc_unlock(struct lru_cache *lc) { clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/include/linux/wait.h b/include/linux/wait.h index 168dfe122dd3..7cb64d4b499d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -550,6 +550,170 @@ do { \ __ret; \ }) + +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_lock_irq_cmd - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd + * and schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + */ +#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) + +/** + * wait_event_lock_irq - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + */ +#define wait_event_lock_irq(wq, condition, lock) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, ); \ +} while (0) + + +#define __wait_event_interruptible_lock_irq(wq, condition, \ + lock, ret, cmd) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + ret = -ERESTARTSYS; \ + break; \ + } \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +/** + * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. + * The condition is checked under the lock. This is expected to + * be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd and + * schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, cmd); \ + __ret; \ +}) + +/** + * wait_event_interruptible_lock_irq - sleep until a condition gets true. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq(wq, condition, lock) \ +({ \ + int __ret = 0; \ + \ + if (!(condition)) \ + __wait_event_interruptible_lock_irq(wq, condition, \ + lock, __ret, ); \ + __ret; \ +}) + + /* * These are the old interfaces to sleep waiting for an event. * They are racy. DO NOT use them, use the wait_event* interfaces above. |