diff --git a/distrib/sets/lists/minix-comp/mi b/distrib/sets/lists/minix-comp/mi index 247ab29e5..f4eb5b464 100644 --- a/distrib/sets/lists/minix-comp/mi +++ b/distrib/sets/lists/minix-comp/mi @@ -1231,6 +1231,7 @@ ./usr/include/minix/procfs.h minix-comp ./usr/include/minix/profile.h minix-comp ./usr/include/minix/queryparam.h minix-comp +./usr/include/minix/rmib.h minix-comp ./usr/include/minix/rs.h minix-comp ./usr/include/minix/safecopies.h minix-comp ./usr/include/minix/sched.h minix-comp diff --git a/distrib/sets/lists/minix-tests/mi b/distrib/sets/lists/minix-tests/mi index 8e20fe782..7612d44f6 100644 --- a/distrib/sets/lists/minix-tests/mi +++ b/distrib/sets/lists/minix-tests/mi @@ -84,6 +84,9 @@ ./usr/tests/minix-posix/ddekit/ddekittest_driver minix-tests ./usr/tests/minix-posix/ddekit/system.conf minix-tests ./usr/tests/minix-posix/mod minix-tests pic +./usr/tests/minix-posix/rmibtest minix-tests +./usr/tests/minix-posix/rmibtest/rmibtest minix-tests +./usr/tests/minix-posix/rmibtest/rmibtest.conf minix-tests ./usr/tests/minix-posix/run minix-tests ./usr/tests/minix-posix/t10a minix-tests ./usr/tests/minix-posix/t11a minix-tests @@ -195,6 +198,7 @@ ./usr/tests/minix-posix/testkyua minix-tests ./usr/tests/minix-posix/testmfs minix-tests ./usr/tests/minix-posix/testrelpol minix-tests +./usr/tests/minix-posix/testrmib minix-tests ./usr/tests/minix-posix/testsh1 minix-tests ./usr/tests/minix-posix/testsh2 minix-tests ./usr/tests/minix-posix/testvm minix-tests diff --git a/etc/mtree/NetBSD.dist.base b/etc/mtree/NetBSD.dist.base index 4fac55a87..8c314dace 100644 --- a/etc/mtree/NetBSD.dist.base +++ b/etc/mtree/NetBSD.dist.base @@ -261,6 +261,7 @@ ./usr/tests/minix-posix ./usr/tests/minix-posix/blocktest ./usr/tests/minix-posix/ddekit +./usr/tests/minix-posix/rmibtest # this one is for term(1) /set type=dir uid=0 gid=5 mode=775 diff --git a/minix/include/minix/Makefile b/minix/include/minix/Makefile index 77627ba9f..dbc780436 100644 --- a/minix/include/minix/Makefile +++ b/minix/include/minix/Makefile @@ -16,7 +16,7 @@ INCS+= acpi.h audio_fw.h bitmap.h \ keymap.h log.h mmio.h mthread.h minlib.h \ netdriver.h optset.h padconf.h partition.h portio.h \ priv.h procfs.h profile.h queryparam.h \ - rs.h safecopies.h sched.h sef.h sffs.h \ + rmib.h rs.h safecopies.h sched.h sef.h sffs.h \ sound.h spin.h sys_config.h sysctl.h sysinfo.h \ syslib.h sysutil.h timers.h type.h \ u64.h usb.h usb_ch9.h vbox.h \ diff --git a/minix/include/minix/com.h b/minix/include/minix/com.h index 9fcb12552..f6a728630 100644 --- a/minix/include/minix/com.h +++ b/minix/include/minix/com.h @@ -607,9 +607,18 @@ /* Process event message from PM. */ #define PROC_EVENT (COMMON_RQ_BASE+3) +/* MIB information request for the root node of a registered subtree. */ +#define COMMON_MIB_INFO (COMMON_RQ_BASE+4) + +/* MIB sysctl request on a registered subtree. */ +#define COMMON_MIB_CALL (COMMON_RQ_BASE+5) + /* Reply to process event message to PM. */ #define PROC_EVENT_REPLY (COMMON_RS_BASE+0) +/* Reply to MIB information or sysctl request. */ +#define COMMON_MIB_REPLY (COMMON_RS_BASE+1) + /*===========================================================================* * Messages for VM server * *===========================================================================*/ @@ -1013,8 +1022,10 @@ #define IS_MIB_CALL(type) (((type) & ~0xff) == MIB_BASE) #define MIB_SYSCTL (MIB_BASE + 0) /* sysctl(2) */ +#define MIB_REGISTER (MIB_BASE + 1) /* mount subtree */ +#define MIB_DEREGISTER (MIB_BASE + 2) /* unmount subtree */ -#define NR_MIB_CALLS 1 /* highest number from base plus one */ +#define NR_MIB_CALLS 3 /* highest number from base plus one */ /*===========================================================================* * Internal codes used by several services * diff --git a/minix/include/minix/drivers.h b/minix/include/minix/drivers.h index f975b0c22..da94fef14 100644 --- a/minix/include/minix/drivers.h +++ b/minix/include/minix/drivers.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -39,5 +40,6 @@ #include #include #include +#include #endif diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index b5051a785..721c3cc86 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -1322,6 +1322,24 @@ typedef struct { } mess_lsys_kern_vsafecopy; _ASSERT_MSG_SIZE(mess_lsys_kern_vsafecopy); +typedef struct { + uint32_t root_id; + uint32_t flags; + unsigned int csize; + unsigned int clen; + unsigned int miblen; + int mib[CTL_SHORTNAME]; + uint8_t padding[4]; +} mess_lsys_mib_register; +_ASSERT_MSG_SIZE(mess_lsys_mib_register); + +typedef struct { + uint32_t req_id; + ssize_t status; + uint8_t padding[48]; +} mess_lsys_mib_reply; +_ASSERT_MSG_SIZE(mess_lsys_mib_reply); + typedef struct { int devind; int port; @@ -1480,6 +1498,34 @@ typedef struct { } mess_mib_lc_sysctl; _ASSERT_MSG_SIZE(mess_mib_lc_sysctl); +typedef struct { + uint32_t req_id; + uint32_t root_id; + cp_grant_id_t name_grant; + unsigned int name_len; + cp_grant_id_t oldp_grant; + size_t oldp_len; + cp_grant_id_t newp_grant; + size_t newp_len; + endpoint_t user_endpt; + uint32_t flags; + uint32_t root_ver; + uint32_t tree_ver; + uint8_t padding[8]; +} mess_mib_lsys_call; +_ASSERT_MSG_SIZE(mess_mib_lsys_call); + +typedef struct { + uint32_t req_id; + uint32_t root_id; + cp_grant_id_t name_grant; + size_t name_size; + cp_grant_id_t desc_grant; + size_t desc_size; + uint8_t padding[32]; +} mess_mib_lsys_info; +_ASSERT_MSG_SIZE(mess_mib_lsys_info); + typedef struct { off_t offset; void *addr; @@ -2278,6 +2324,8 @@ typedef struct noxfer_message { mess_lsys_krn_sys_vdevio m_lsys_krn_sys_vdevio; mess_lsys_krn_sys_vumap m_lsys_krn_sys_vumap; mess_lsys_kern_vsafecopy m_lsys_kern_vsafecopy; + mess_lsys_mib_register m_lsys_mib_register; + mess_lsys_mib_reply m_lsys_mib_reply; mess_lsys_pci_busc_get_bar m_lsys_pci_busc_get_bar; mess_lsys_pm_getepinfo m_lsys_pm_getepinfo; mess_lsys_pm_getprocnr m_lsys_pm_getprocnr; @@ -2297,6 +2345,8 @@ typedef struct noxfer_message { mess_lsys_vm_update m_lsys_vm_update; mess_lsys_vm_vmremap m_lsys_vm_vmremap; mess_mib_lc_sysctl m_mib_lc_sysctl; + mess_mib_lsys_call m_mib_lsys_call; + mess_mib_lsys_info m_mib_lsys_info; mess_mmap m_mmap; mess_net_netdrv_dl_conf m_net_netdrv_dl_conf; mess_net_netdrv_dl_getstat_s m_net_netdrv_dl_getstat_s; diff --git a/minix/include/minix/rmib.h b/minix/include/minix/rmib.h new file mode 100644 index 000000000..0e055a404 --- /dev/null +++ b/minix/include/minix/rmib.h @@ -0,0 +1,152 @@ +#ifndef _MINIX_RMIB_H +#define _MINIX_RMIB_H + +/* + * This header file is for use by services that use the remote MIB (RMIB) + * functionality of libsys. RMIB allows services to mount and handle certain + * subtrees of the MIB service's sysctl tree. + */ + +#include + +/* + * This structure contains a number of less heavily used parameters for handler + * functions, mainly to provide extensibility while limiting argument clutter. + */ +struct rmib_call { + endpoint_t call_endpt; /* endpoint of the user process */ + const int *call_name; /* remaining part of the name */ + unsigned int call_namelen; /* length of the remaining name part */ + unsigned int call_flags; /* RMIB_FLAG_ call flags */ + uint32_t call_rootver; /* version of all nodes in subtree */ + uint32_t call_treever; /* version of the entire MIB tree */ +}; + +/* + * Call flags. + * + * TODO: this is effectively a flag used on the wire. This should be turned + * into a proper definition shared with the MIB service. As long as we have + * only one flag anyway, this is not exactly urgent though. + */ +#define RMIB_FLAG_AUTH 0x1 /* user has superuser privileges */ + +struct rmib_node; +struct rmib_oldp; +struct rmib_newp; + +typedef ssize_t (*rmib_func_ptr)(struct rmib_call *, struct rmib_node *, + struct rmib_oldp *, struct rmib_newp *); + +/* + * The central structure for remote MIB nodes. This is essentially a somewhat + * cut-down version of the node structure used within the MIB service. See the + * source code of that service for several details that apply here as well. + * The 'rnode_' prefix makes it possible to include both this header file and + * the MIB service's internal header file at once--neat if useless. + */ +struct rmib_node { + uint32_t rnode_flags; /* CTLTYPE_ type and CTLFLAG_ flags */ + size_t rnode_size; /* size of associated data */ + union ixfer_rnode_val_u { + bool rvu_bool; /* immediate boolean */ + int rvu_int; /* immediate integer */ + u_quad_t rvu_quad; /* immediate quad */ + uint32_t rvu_clen; /* number of actual children */ + } rnode_val_u; + union pxfer_rnode_ptr_u { + void *rpu_data; /* struct or string data pointer */ + struct rmib_node *rpu_cptr; /* child node array */ + } rnode_ptr_u; + rmib_func_ptr rnode_func; /* handler function */ + const char *rnode_name; /* node name string */ + const char *rnode_desc; /* node description (may be NULL) */ +}; +#define rnode_bool rnode_val_u.rvu_bool +#define rnode_int rnode_val_u.rvu_int +#define rnode_quad rnode_val_u.rvu_quad +#define rnode_clen rnode_val_u.rvu_clen +#define rnode_data rnode_ptr_u.rpu_data +#define rnode_cptr rnode_ptr_u.rpu_cptr + +/* Various macros to initialize nodes at compile time. */ +#define RMIB_NODE(f,t,n,d) { \ + .rnode_flags = CTLTYPE_NODE | CTLFLAG_READONLY | \ + CTLFLAG_PERMANENT | f, \ + .rnode_size = __arraycount(t), \ + .rnode_cptr = t, \ + .rnode_name = n, \ + .rnode_desc = d \ +} +#define RMIB_FUNC(f,s,fp,n,d) { \ + .rnode_flags = CTLFLAG_PERMANENT | f, \ + .rnode_size = s, \ + .rnode_func = fp, \ + .rnode_name = n, \ + .rnode_desc = d \ +} +#define RMIB_BOOL(f,b,n,d) { \ + .rnode_flags = CTLTYPE_BOOL | CTLFLAG_PERMANENT | \ + CTLFLAG_IMMEDIATE | f, \ + .rnode_size = sizeof(bool), \ + .rnode_bool = b, \ + .rnode_name = n, \ + .rnode_desc = d \ +} +#define RMIB_INT(f,i,n,d) { \ + .rnode_flags = CTLTYPE_INT | CTLFLAG_PERMANENT | \ + CTLFLAG_IMMEDIATE | f, \ + .rnode_size = sizeof(int), \ + .rnode_int = i, \ + .rnode_name = n, \ + .rnode_desc = d \ +} +#define RMIB_QUAD(f,q,n,d) { \ + .rnode_flags = CTLTYPE_QUAD | CTLFLAG_PERMANENT | \ + CTLFLAG_IMMEDIATE | f, \ + .rnode_size = sizeof(u_quad_t), \ + .rnode_quad = q, \ + .rnode_name = n, \ + .rnode_desc = d \ +} +#define _RMIB_DATA(f,s,p,n,d) { \ + .rnode_flags = CTLFLAG_PERMANENT | f, \ + .rnode_size = s, \ + .rnode_data = __UNCONST(p), \ + .rnode_name = n, \ + .rnode_desc = d \ +} +/* + * The following macros really require a pointer to the proper data type; weird + * casts may not trigger compiler warnings but do allow for memory corruption. + * The first three need to be passed a pointer to a bool, int, and u_quad_t, + * respectively. RMIB_STRING needs a pointer to a character array, so that + * sizeof(array) yields the proper size. Since RMIB_STRUCT may be given a + * pointer to either a structure or an array, it must also be given a size. + */ +#define RMIB_BOOLPTR(f,p,n,d) _RMIB_DATA(CTLTYPE_BOOL | f, sizeof(*p), p, n, d) +#define RMIB_INTPTR(f,p,n,d) _RMIB_DATA(CTLTYPE_INT | f, sizeof(*p), p, n, d) +#define RMIB_QUADPTR(f,p,n,d) _RMIB_DATA(CTLTYPE_QUAD | f, sizeof(*p), p, n, d) +#define RMIB_STRING(f,p,n,d) \ + _RMIB_DATA(CTLTYPE_STRING | f, sizeof(p), p, n, d) +#define RMIB_STRUCT(f,s,p,n,d) _RMIB_DATA(CTLTYPE_STRUCT | f, s, p, n, d) + +/* Shortcut flag macros. */ +#define RMIB_RO CTLFLAG_READONLY /* shortcut for read-only nodes */ +#define RMIB_RW CTLFLAG_READWRITE /* shortcut for read-write nodes */ + +/* Function prototypes. */ +int rmib_register(const int * name, unsigned int namelen, struct rmib_node *); +int rmib_deregister(struct rmib_node *); +void rmib_reset(void); +void rmib_process(const message *, int); + +int rmib_inrange(struct rmib_oldp *, size_t); +size_t rmib_getoldlen(struct rmib_oldp *); +ssize_t rmib_copyout(struct rmib_oldp *, size_t, const void * __restrict, + size_t); +int rmib_copyin(struct rmib_newp * __restrict, void * __restrict, size_t); +ssize_t rmib_readwrite(struct rmib_call *, struct rmib_node *, + struct rmib_oldp *, struct rmib_newp *); + +#endif /* !_MINIX_RMIB_H */ diff --git a/minix/include/minix/sysctl.h b/minix/include/minix/sysctl.h index ebd1bfda9..38a817521 100644 --- a/minix/include/minix/sysctl.h +++ b/minix/include/minix/sysctl.h @@ -51,6 +51,7 @@ /* Identifiers for subnodes of MINIX_MIB. */ #define MIB_NODES 1 #define MIB_OBJECTS 2 +#define MIB_REMOTES 3 /* Identifiers for subnodes of MINIX_PROC. */ #define PROC_LIST 1 diff --git a/minix/kernel/system/do_safecopy.c b/minix/kernel/system/do_safecopy.c index 6002a9011..13433e69d 100644 --- a/minix/kernel/system/do_safecopy.c +++ b/minix/kernel/system/do_safecopy.c @@ -216,13 +216,13 @@ int verify_grant( *offset_result = g.cp_u.cp_direct.cp_start + offset_in; *e_granter = granter; } else if(g.cp_flags & CPF_MAGIC) { - /* Currently, it is hardcoded that only FS may do - * magic grants. + /* Currently, it is hardcoded that only VFS and MIB may do + * magic grants. TODO: this should be a system.conf flag. */ - if(granter != VFS_PROC_NR) { + if(granter != VFS_PROC_NR && granter != MIB_PROC_NR) { printf( "verify_grant: magic grant verify failed: granter (%d) " - "is not FS (%d)\n", granter, VFS_PROC_NR); + "not allowed\n", granter); return EPERM; } diff --git a/minix/lib/libsys/Makefile b/minix/lib/libsys/Makefile index d19fbe140..935ebbc06 100644 --- a/minix/lib/libsys/Makefile +++ b/minix/lib/libsys/Makefile @@ -36,6 +36,7 @@ SRCS+= \ optset.c \ panic.c \ proceventmask.c \ + rmib.c \ safecopies.c \ sched_start.c \ sched_stop.c \ diff --git a/minix/lib/libsys/rmib.c b/minix/lib/libsys/rmib.c new file mode 100644 index 000000000..430135366 --- /dev/null +++ b/minix/lib/libsys/rmib.c @@ -0,0 +1,949 @@ +/* Service support for remote MIB subtrees - by D.C. van Moolenbroek */ +/* + * In effect, this is a lightweight version of the MIB service's main and tree + * code. Some parts of the code have even been copied almost as is, even + * though the copy here operates on slightly different data structures in order + * to keep the implementation more lightweight. For clarification on many + * aspects of the source code here, see the source code of the MIB service. + * + * There is no way for this module to get to know about MIB service deaths + * without possibly interfering with the main code of the service this module + * is a part of. As a result, re-registration of mount points after a MIB + * service restart is not automatic. Instead, the main service code could + * implement re-registration by first calling rmib_reset() and then making the + * appropriate rmib_register() calls again. TODO: it would be nicer if this + * module implemented re-registration, but that requires saving the MIB path + * for each of the registered subtrees. + */ + +#include +#include +#include + +/* Structures for outgoing and incoming data, deliberately distinctly named. */ +struct rmib_oldp { + cp_grant_id_t oldp_grant; + size_t oldp_len; +}; + +struct rmib_newp { + cp_grant_id_t newp_grant; + size_t newp_len; +}; + +/* + * The maximum field size, in bytes, for which updates (i.e., writes) to the + * field do not require dynamic memory allocation. By policy, non-root users + * may not update fields exceeding this size at all. For strings, this size + * includes an extra byte for adding a null terminator if missing. As the name + * indicates, a buffer of this size is placed on the stack. + */ +#define RMIB_STACKBUF 257 + +/* + * The maximum number of subtrees that this service can mount. This value can + * be increased without any problems, but it is already quite high in practice. + */ +#define RMIB_MAX_SUBTREES 16 + +/* + * The array of subtree root nodes. Each root node's array index is the root + * identifier used in communication with the MIB service. + */ +static struct rmib_node *rnodes[RMIB_MAX_SUBTREES] = { NULL }; + +/* + * Return TRUE or FALSE indicating whether the given offset is within the range + * of data that is to be copied out. This call can be used to test whether + * certain bits of data need to be prepared for copying at all. + */ +int +rmib_inrange(struct rmib_oldp * oldp, size_t off) +{ + + if (oldp == NULL) + return FALSE; + + return (off < oldp->oldp_len); +} + +/* + * Return the total length of the requested data. This should not be used + * directly except in highly unusual cases, such as particular node requests + * where the request semantics blatantly violate overall sysctl(2) semantics. + */ +size_t +rmib_getoldlen(struct rmib_oldp * oldp) +{ + + if (oldp == NULL) + return 0; + + return oldp->oldp_len; +} + +/* + * Copy out (partial) data to the user. The copy is automatically limited to + * the range of data requested by the user. Return the requested length on + * success (for the caller's convenience) or an error code on failure. + */ +ssize_t +rmib_copyout(struct rmib_oldp * __restrict oldp, size_t off, + const void * __restrict buf, size_t size) +{ + size_t len; + int r; + + len = size; + assert(len <= SSIZE_MAX); + + if (oldp == NULL || off >= oldp->oldp_len) + return size; /* nothing to do */ + + if (len > oldp->oldp_len - off) + len = oldp->oldp_len - off; + + if ((r = sys_safecopyto(MIB_PROC_NR, oldp->oldp_grant, off, + (vir_bytes)buf, len)) != OK) + return r; + + return size; +} + +/* + * Copy in data from the user. The given length must match exactly the length + * given by the user. Return OK or an error code. + */ +int +rmib_copyin(struct rmib_newp * __restrict newp, void * __restrict buf, + size_t len) +{ + + if (newp == NULL || len != newp->newp_len) + return EINVAL; + + if (len == 0) + return OK; + + return sys_safecopyfrom(MIB_PROC_NR, newp->newp_grant, 0, + (vir_bytes)buf, len); +} + +/* + * Copy out a node to userland, using the exchange format for nodes (namely, + * a sysctlnode structure). Return the size of the object that is (or, if the + * node falls outside the requested data range, would be) copied out on + * success, or a negative error code on failure. + */ +static ssize_t +rmib_copyout_node(struct rmib_call * call, struct rmib_oldp * oldp, + ssize_t off, unsigned int id, const struct rmib_node * rnode) +{ + struct sysctlnode scn; + int visible; + + if (!rmib_inrange(oldp, off)) + return sizeof(scn); /* nothing to do */ + + memset(&scn, 0, sizeof(scn)); + + /* + * The RMIB implementation does not overload flags, so it also need not + * hide any of them from the user. + */ + scn.sysctl_flags = SYSCTL_VERSION | rnode->rnode_flags; + scn.sysctl_num = id; + strlcpy(scn.sysctl_name, rnode->rnode_name, sizeof(scn.sysctl_name)); + scn.sysctl_ver = call->call_rootver; + scn.sysctl_size = rnode->rnode_size; + + /* Some information is only visible if the user can access the node. */ + visible = (!(rnode->rnode_flags & CTLFLAG_PRIVATE) || + (call->call_flags & RMIB_FLAG_AUTH)); + + /* + * For immediate types, store the immediate value in the resulting + * structure, unless the caller is not authorized to obtain the value. + */ + if ((rnode->rnode_flags & CTLFLAG_IMMEDIATE) && visible) { + switch (SYSCTL_TYPE(rnode->rnode_flags)) { + case CTLTYPE_BOOL: + scn.sysctl_bdata = rnode->rnode_bool; + break; + case CTLTYPE_INT: + scn.sysctl_idata = rnode->rnode_int; + break; + case CTLTYPE_QUAD: + scn.sysctl_qdata = rnode->rnode_quad; + break; + } + } + + /* Special rules apply to parent nodes. */ + if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_NODE) { + /* Report the node size the way NetBSD does, just in case. */ + scn.sysctl_size = sizeof(scn); + + /* + * For real parent nodes, report child information, but only if + * the node itself is accessible by the caller. For function- + * driven nodes, set a nonzero function address, for trace(1). + */ + if (rnode->rnode_func == NULL && visible) { + scn.sysctl_csize = rnode->rnode_size; + scn.sysctl_clen = rnode->rnode_clen; + } else if (rnode->rnode_func != NULL) + scn.sysctl_func = SYSCTL_NODE_FN; + } + + /* Copy out the resulting node. */ + return rmib_copyout(oldp, off, &scn, sizeof(scn)); +} + +/* + * Given a query on a non-leaf (parent) node, provide the user with an array of + * this node's children. + */ +static ssize_t +rmib_query(struct rmib_call * call, struct rmib_node * rparent, + struct rmib_oldp * oldp, struct rmib_newp * newp) +{ + struct sysctlnode scn; + struct rmib_node *rnode; + unsigned int id; + ssize_t r, off; + + /* If the user passed in version numbers, check them. */ + if (newp != NULL) { + if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK) + return r; + + if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION) + return EINVAL; + + /* + * If a node version number is given, it must match the version + * of the subtree or the root of the entire MIB version. + */ + if (scn.sysctl_ver != 0 && + scn.sysctl_ver != call->call_rootver && + scn.sysctl_ver != call->call_treever) + return EINVAL; + } + + /* Enumerate the child nodes of the given parent node. */ + off = 0; + + for (id = 0; id < rparent->rnode_size; id++) { + rnode = &rparent->rnode_cptr[id]; + + if (rnode->rnode_flags == 0) + continue; + + if ((r = rmib_copyout_node(call, oldp, off, id, rnode)) < 0) + return r; + off += r; + } + + return off; +} + +/* + * Copy out a node description to userland, using the exchange format for node + * descriptions (namely, a sysctldesc structure). Return the size of the + * object that is (or, if the description falls outside the requested data + * range, would be) copied out on success, or a negative error code on failure. + * The function may return 0 to indicate that nothing was copied out after all. + */ +static ssize_t +rmib_copyout_desc(struct rmib_call * call, struct rmib_oldp * oldp, + ssize_t off, unsigned int id, const struct rmib_node * rnode) +{ + struct sysctldesc scd; + size_t len, size; + ssize_t r; + + /* Descriptions of private nodes are considered private too. */ + if ((rnode->rnode_flags & CTLFLAG_PRIVATE) && + !(call->call_flags & RMIB_FLAG_AUTH)) + return 0; + + /* + * Unfortunately, we do not have a scratch buffer here. Instead, copy + * out the description structure and the actual description string + * separately. This is more costly, but remote subtrees are already + * not going to give the best performance ever. We do optimize for the + * case that there is no description, because that is relatively easy. + */ + /* The description length includes the null terminator. */ + if (rnode->rnode_desc != NULL) + len = strlen(rnode->rnode_desc) + 1; + else + len = 1; + + memset(&scd, 0, sizeof(scd)); + scd.descr_num = id; + scd.descr_ver = call->call_rootver; + scd.descr_len = len; + + size = offsetof(struct sysctldesc, descr_str); + + if (len == 1) { + scd.descr_str[0] = '\0'; /* superfluous */ + size++; + } + + /* Copy out the structure, possibly including a null terminator. */ + if ((r = rmib_copyout(oldp, off, &scd, size)) < 0) + return r; + + if (len > 1) { + /* Copy out the description itself. */ + if ((r = rmib_copyout(oldp, off + size, rnode->rnode_desc, + len)) < 0) + return r; + + size += len; + } + + /* + * By aligning just the size, we may leave garbage between the entries + * copied out, which is fine because it is userland's own data. + */ + return roundup2(size, sizeof(int32_t)); +} + +/* + * Retrieve node descriptions in bulk, or retrieve a particular node's + * description. + */ +static ssize_t +rmib_describe(struct rmib_call * call, struct rmib_node * rparent, + struct rmib_oldp * oldp, struct rmib_newp * newp) +{ + struct sysctlnode scn; + struct rmib_node *rnode; + unsigned int id; + ssize_t r, off; + + if (newp != NULL) { + if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK) + return r; + + if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION) + return EINVAL; + + /* Locate the child node. */ + if ((unsigned int)scn.sysctl_num >= rparent->rnode_size) + return ENOENT; + rnode = &rparent->rnode_cptr[scn.sysctl_num]; + if (rnode->rnode_flags == 0) + return ENOENT; + + /* Descriptions of private nodes are considered private too. */ + if ((rnode->rnode_flags & CTLFLAG_PRIVATE) && + !(call->call_flags & RMIB_FLAG_AUTH)) + return EPERM; + + /* + * If a description pointer was given, this is a request to + * set the node's description. We do not allow this, nor would + * we be able to support it, since we cannot access the data. + */ + if (scn.sysctl_desc != NULL) + return EPERM; + + /* + * Copy out the requested node's description. At this point we + * should be sure that this call does not return zero. + */ + return rmib_copyout_desc(call, oldp, 0, scn.sysctl_num, rnode); + } + + /* Describe the child nodes of the given parent node. */ + off = 0; + + for (id = 0; id < rparent->rnode_size; id++) { + rnode = &rparent->rnode_cptr[id]; + + if (rnode->rnode_flags == 0) + continue; + + if ((r = rmib_copyout_desc(call, oldp, off, id, rnode)) < 0) + return r; + off += r; + } + + return off; +} + +/* + * Return a pointer to the data associated with the given node, or NULL if the + * node has no associated data. Actual calls to this function should never + * result in NULL - as long as the proper rules are followed elsewhere. + */ +static void * +rmib_getptr(struct rmib_node * rnode) +{ + + switch (SYSCTL_TYPE(rnode->rnode_flags)) { + case CTLTYPE_BOOL: + if (rnode->rnode_flags & CTLFLAG_IMMEDIATE) + return &rnode->rnode_bool; + break; + case CTLTYPE_INT: + if (rnode->rnode_flags & CTLFLAG_IMMEDIATE) + return &rnode->rnode_int; + break; + case CTLTYPE_QUAD: + if (rnode->rnode_flags & CTLFLAG_IMMEDIATE) + return &rnode->rnode_quad; + break; + case CTLTYPE_STRING: + case CTLTYPE_STRUCT: + if (rnode->rnode_flags & CTLFLAG_IMMEDIATE) + return NULL; + break; + default: + return NULL; + } + + return rnode->rnode_data; +} + +/* + * Read current (old) data from a regular data node, if requested. Return the + * old data length. + */ +static ssize_t +rmib_read(struct rmib_node * rnode, struct rmib_oldp * oldp) +{ + void *ptr; + size_t oldlen; + int r; + + if ((ptr = rmib_getptr(rnode)) == NULL) + return EINVAL; + + if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_STRING) + oldlen = strlen(rnode->rnode_data) + 1; + else + oldlen = rnode->rnode_size; + + if (oldlen > SSIZE_MAX) + return EINVAL; + + /* Copy out the current data, if requested at all. */ + if (oldp != NULL && (r = rmib_copyout(oldp, 0, ptr, oldlen)) < 0) + return r; + + /* Return the current length in any case. */ + return (ssize_t)oldlen; +} + +/* + * Write new data into a regular data node, if requested. + */ +static int +rmib_write(struct rmib_call * call, struct rmib_node * rnode, + struct rmib_newp * newp) +{ + bool b[(sizeof(bool) == sizeof(char)) ? 1 : -1]; /* for sanitizing */ + char *src, *dst, buf[RMIB_STACKBUF]; + size_t newlen; + int r; + + if (newp == NULL) + return OK; /* nothing to do */ + + /* + * When setting a new value, we cannot risk doing an in-place update: + * the copy from userland may fail halfway through, in which case an + * in-place update could leave the node value in a corrupted state. + * Thus, we must first fetch any new data into a temporary buffer. + */ + newlen = newp->newp_len; + + if ((dst = rmib_getptr(rnode)) == NULL) + return EINVAL; + + switch (SYSCTL_TYPE(rnode->rnode_flags)) { + case CTLTYPE_BOOL: + case CTLTYPE_INT: + case CTLTYPE_QUAD: + case CTLTYPE_STRUCT: + /* Non-string types must have an exact size match. */ + if (newlen != rnode->rnode_size) + return EINVAL; + break; + case CTLTYPE_STRING: + /* + * Strings must not exceed their buffer size. There is a + * second check further below, because we allow userland to + * give us an unterminated string. In that case we terminate + * it ourselves, but then the null terminator must fit as well. + */ + if (newlen > rnode->rnode_size) + return EINVAL; + break; + default: + return EINVAL; + } + + /* + * If we cannot fit the data in the small stack buffer, then allocate a + * temporary buffer. We add one extra byte so that we can add a null + * terminator at the end of strings in case userland did not supply + * one. Either way, we must free the temporary buffer later! + */ + if (newlen + 1 > sizeof(buf)) { + /* + * For regular users, we do not want to perform dynamic memory + * allocation. Thus, for CTLTYPE_ANYWRITE nodes, only the + * superuser may set values exceeding the small buffer in size. + */ + if (!(call->call_flags & RMIB_FLAG_AUTH)) + return EPERM; + + /* Do not return ENOMEM on allocation failure. */ + if ((src = malloc(newlen + 1)) == NULL) + return EINVAL; + } else + src = buf; + + /* Copy in the data. Note that the given new length may be zero. */ + if ((r = rmib_copyin(newp, src, newlen)) == OK) { + /* Check and, if acceptable, store the new value. */ + switch (SYSCTL_TYPE(rnode->rnode_flags)) { + case CTLTYPE_BOOL: + /* Sanitize booleans. See the MIB code for details. */ + b[0] = (bool)src[0]; + memcpy(dst, &b[0], sizeof(b[0])); + break; + case CTLTYPE_INT: + case CTLTYPE_QUAD: + case CTLTYPE_STRUCT: + memcpy(dst, src, rnode->rnode_size); + break; + case CTLTYPE_STRING: + if (newlen == rnode->rnode_size && + src[newlen - 1] != '\0') { + /* Our null terminator does not fit! */ + r = EINVAL; + break; + } + src[newlen] = '\0'; + strlcpy(dst, src, rnode->rnode_size); + break; + default: + r = EINVAL; + } + } + + if (src != buf) + free(src); + + return r; +} + +/* + * Read and/or write the value of a regular data node. A regular data node is + * a leaf node. Typically, a leaf node has no associated function, in which + * case this function will be used instead. In addition, this function may be + * used from handler functions as part of their functionality. + */ +ssize_t +rmib_readwrite(struct rmib_call * call, struct rmib_node * rnode, + struct rmib_oldp * oldp, struct rmib_newp * newp) +{ + ssize_t len; + int r; + + /* Copy out old data, if requested. Always get the old data length. */ + if ((r = len = rmib_read(rnode, oldp)) < 0) + return r; + + /* Copy in new data, if requested. */ + if ((r = rmib_write(call, rnode, newp)) != OK) + return r; + + /* Return the old data length. */ + return len; +} + +/* + * Handle a sysctl(2) call from a user process, relayed by the MIB service to + * us. If the call succeeds, return the old length. The MIB service will + * perform a check against the given old length and return ENOMEM to the caller + * when applicable, so we do not have to do that here. If the call fails, + * return a negative error code. + */ +static ssize_t +rmib_call(const message * m_in) +{ + struct rmib_node *rnode, *rparent; + struct rmib_call call; + struct rmib_oldp oldp_data, *oldp; + struct rmib_newp newp_data, *newp; + unsigned int root_id, namelen; + int r, id, is_leaf, has_func, name[CTL_MAXNAME]; + + /* + * Look up the root of the subtree that is the subject of the call. If + * the call is for a subtree that is not registered, return ERESTART to + * indicate to the MIB service that it should deregister the subtree it + * thinks we have. This case may occur in practice if a deregistration + * request from us crosses a sysctl call request from the MIB service. + */ + root_id = m_in->m_mib_lsys_call.root_id; + if (root_id >= __arraycount(rnodes) || rnodes[root_id] == NULL) + return ERESTART; + rnode = rnodes[root_id]; + + /* + * Set up all data structures that we need to use while handling the + * call processing. Start by copying in the remainder of the MIB name. + */ + /* A zero name length is valid and should always yield EISDIR. */ + namelen = m_in->m_mib_lsys_call.name_len; + if (namelen > __arraycount(name)) + return EINVAL; + + if (namelen > 0) { + r = sys_safecopyfrom(m_in->m_source, + m_in->m_mib_lsys_call.name_grant, 0, (vir_bytes)name, + sizeof(name[0]) * namelen); + if (r != OK) + return r; + } + + oldp_data.oldp_grant = m_in->m_mib_lsys_call.oldp_grant; + oldp_data.oldp_len = m_in->m_mib_lsys_call.oldp_len; + oldp = (GRANT_VALID(oldp_data.oldp_grant)) ? &oldp_data : NULL; + + newp_data.newp_grant = m_in->m_mib_lsys_call.newp_grant; + newp_data.newp_len = m_in->m_mib_lsys_call.newp_len; + newp = (GRANT_VALID(newp_data.newp_grant)) ? &newp_data : NULL; + + call.call_endpt = m_in->m_mib_lsys_call.user_endpt; + call.call_name = name; + call.call_namelen = namelen; + call.call_flags = m_in->m_mib_lsys_call.flags; + call.call_rootver = m_in->m_mib_lsys_call.root_ver; + call.call_treever = m_in->m_mib_lsys_call.tree_ver; + + /* + * Dispatch the call. + */ + for (rparent = rnode; call.call_namelen > 0; rparent = rnode) { + id = call.call_name[0]; + call.call_name++; + call.call_namelen--; + + assert(SYSCTL_TYPE(rparent->rnode_flags) == CTLTYPE_NODE); + + /* Check for meta-identifiers. */ + if (id < 0) { + /* + * A meta-identifier must always be the last name + * component. + */ + if (call.call_namelen > 0) + return EINVAL; + + switch (id) { + case CTL_QUERY: + return rmib_query(&call, rparent, oldp, newp); + case CTL_DESCRIBE: + return rmib_describe(&call, rparent, oldp, + newp); + case CTL_CREATE: + case CTL_DESTROY: + /* We support fully static subtrees only. */ + return EPERM; + default: + return EOPNOTSUPP; + } + } + + /* Locate the child node. */ + if ((unsigned int)id >= rparent->rnode_size) + return ENOENT; + rnode = &rparent->rnode_cptr[id]; + if (rnode->rnode_flags == 0) + return ENOENT; + + /* Check if access is permitted at this level. */ + if ((rnode->rnode_flags & CTLFLAG_PRIVATE) && + !(call.call_flags & RMIB_FLAG_AUTH)) + return EPERM; + + /* + * Is this a leaf node, and/or is this node handled by a + * function? If either is true, resolution ends at this level. + */ + is_leaf = (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE); + has_func = (rnode->rnode_func != NULL); + + /* + * The name may be longer only if the node is not a leaf. That + * also applies to leaves with functions, so check this first. + */ + if (is_leaf && call.call_namelen > 0) + return ENOTDIR; + + /* + * If resolution indeed ends here, and the user supplied new + * data, check if writing is allowed. + */ + if ((is_leaf || has_func) && newp != NULL) { + if (!(rnode->rnode_flags & CTLFLAG_READWRITE)) + return EPERM; + + if (!(rnode->rnode_flags & CTLFLAG_ANYWRITE) && + !(call.call_flags & RMIB_FLAG_AUTH)) + return EPERM; + } + + /* If this node has a handler function, let it do the work. */ + if (has_func) + return rnode->rnode_func(&call, rnode, oldp, newp); + + /* For regular data leaf nodes, handle generic access. */ + if (is_leaf) + return rmib_readwrite(&call, rnode, oldp, newp); + + /* No function and not a leaf? Descend further. */ + } + + /* If we get here, the name refers to a node array. */ + return EISDIR; +} + +/* + * Initialize the given node and recursively all its node-type children, + * assigning the proper child length value to each of them. + */ +static void +rmib_init(struct rmib_node * rnode) +{ + struct rmib_node *rchild; + unsigned int id; + + rchild = rnode->rnode_cptr; + + for (id = 0; id < rnode->rnode_size; id++, rchild++) { + if (rchild->rnode_flags == 0) + continue; + + rnode->rnode_clen++; + + if (SYSCTL_TYPE(rchild->rnode_flags) == CTLTYPE_NODE) + rmib_init(rchild); /* recurse */ + } +} + +/* + * Register a MIB subtree. Initialize the subtree, add it to the local set, + * and send a registration request for it to the MIB service. + */ +int +rmib_register(const int * name, unsigned int namelen, struct rmib_node * rnode) +{ + message m; + unsigned int id, free_id; + int r; + + /* A few basic sanity checks. */ + if (namelen == 0 || namelen >= CTL_SHORTNAME) + return EINVAL; + if (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE) + return EINVAL; + + /* Make sure this is a new subtree, and find a free slot for it. */ + for (id = free_id = 0; id < __arraycount(rnodes); id++) { + if (rnodes[id] == rnode) + return EEXIST; + else if (rnodes[id] == NULL && rnodes[free_id] != NULL) + free_id = id; + } + + if (rnodes[free_id] != NULL) + return ENOMEM; + + /* + * Initialize the entire subtree. This will also compute rnode_clen + * for the given rnode, so do this before sending the message. + */ + rmib_init(rnode); + + /* + * Request that the MIB service mount this subtree. This is a one-way + * request, so we never hear whether mounting succeeds. There is not + * that much we can do if it fails anyway though. + */ + memset(&m, 0, sizeof(m)); + + m.m_type = MIB_REGISTER; + m.m_lsys_mib_register.root_id = free_id; + m.m_lsys_mib_register.flags = SYSCTL_VERSION | rnode->rnode_flags; + m.m_lsys_mib_register.csize = rnode->rnode_size; + m.m_lsys_mib_register.clen = rnode->rnode_clen; + m.m_lsys_mib_register.miblen = namelen; + memcpy(m.m_lsys_mib_register.mib, name, sizeof(name[0]) * namelen); + + if ((r = asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY)) == OK) + rnodes[free_id] = rnode; + + return r; +} + +/* + * Deregister a previously registered subtree, both internally and with the MIB + * service. Return OK if the deregistration procedure has been started, in + * which case the given subtree is guaranteed to no longer be accessed. Return + * a negative error code on failure. + */ +int +rmib_deregister(struct rmib_node * rnode) +{ + message m; + unsigned int id; + + for (id = 0; id < __arraycount(rnodes); id++) + if (rnodes[id] == rnode) + break; + + if (id == __arraycount(rnodes)) + return ENOENT; + + rnodes[id] = NULL; + + /* + * Request that the MIB service unmount the subtree. We completely + * ignore failure here, because the caller would not be able to do + * anything about it anyway. We may also still receive sysctl call + * requests for the node we just deregistered, but this is caught + * during request processing. Reuse of the rnodes[] slot could be a + * potential problem though. We could use sequence numbers in the root + * identifiers to resolve that problem if it ever occurs in reality. + */ + memset(&m, 0, sizeof(m)); + + m.m_type = MIB_DEREGISTER; + m.m_lsys_mib_register.root_id = id; + + (void)asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY); + + return OK; +} + +/* + * Reset all registrations, without involving MIB communication. This call + * must be issued only when the caller has determined that the MIB service has + * restarted, and is about to reregister its subtrees. + */ +void +rmib_reset(void) +{ + + memset(rnodes, 0, sizeof(rnodes)); +} + +/* + * Process a request from the MIB service for information about the root node + * of a subtree, specifically its name and description. + */ +static int +rmib_info(const message * m_in) +{ + struct rmib_node *rnode; + unsigned int id; + const char *ptr; + size_t size; + int r; + + id = m_in->m_mib_lsys_info.root_id; + if (id >= __arraycount(rnodes) || rnodes[id] == NULL) + return ENOENT; + rnode = rnodes[id]; + + /* The name must fit. If it does not, the service writer messed up. */ + size = strlen(rnode->rnode_name) + 1; + if (size > m_in->m_mib_lsys_info.name_size) + return ENAMETOOLONG; + + r = sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.name_grant, 0, + (vir_bytes)rnode->rnode_name, size); + if (r != OK) + return r; + + /* If there is no (optional) description, copy out an empty string. */ + ptr = (rnode->rnode_desc != NULL) ? rnode->rnode_desc : ""; + size = strlen(ptr) + 1; + + if (size > m_in->m_mib_lsys_info.desc_size) + size = m_in->m_mib_lsys_info.desc_size; + + return sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.desc_grant, + 0, (vir_bytes)ptr, size); +} + +/* + * Process a request from the MIB service. The given message should originate + * from the MIB service and have one of the COMMON_MIB_ requests as type. + */ +void +rmib_process(const message * m_in, int ipc_status) +{ + message m_out; + uint32_t req_id; + ssize_t r; + + /* Only the MIB service may issue these requests. */ + if (m_in->m_source != MIB_PROC_NR) + return; + + /* Process the actual request. */ + switch (m_in->m_type) { + case COMMON_MIB_INFO: + req_id = m_in->m_mib_lsys_info.req_id; + + r = rmib_info(m_in); + + break; + + case COMMON_MIB_CALL: + req_id = m_in->m_mib_lsys_call.req_id; + + r = rmib_call(m_in); + + break; + + default: + /* + * HACK: assume that for all current and future requests, the + * request ID field is in the same place. We could create a + * m_mib_lsys_unknown pseudo message type for this, but, eh. + */ + req_id = m_in->m_mib_lsys_info.req_id; + + r = ENOSYS; + } + + /* Construct and send a reply message to the MIB service. */ + memset(&m_out, 0, sizeof(m_out)); + + m_out.m_type = COMMON_MIB_REPLY; + m_out.m_lsys_mib_reply.req_id = req_id; + m_out.m_lsys_mib_reply.status = r; + + if (IPC_STATUS_CALL(ipc_status) == SENDREC) + r = ipc_sendnb(m_in->m_source, &m_out); + else + r = asynsend3(m_in->m_source, &m_out, AMF_NOREPLY); + + if (r != OK) + printf("lsys:rmib: unable to send reply to %d: %d\n", + m_in->m_source, r); +} diff --git a/minix/servers/mib/Makefile b/minix/servers/mib/Makefile index cc5b04f5c..bc241a0e2 100644 --- a/minix/servers/mib/Makefile +++ b/minix/servers/mib/Makefile @@ -1,7 +1,7 @@ # Makefile for the Management Information Base (MIB) server PROG= mib -SRCS= main.c tree.c kern.c vm.c hw.c proc.c minix.c +SRCS= main.c tree.c remote.c kern.c vm.c hw.c proc.c minix.c CPPFLAGS+= -I${NETBSDSRCDIR}/minix diff --git a/minix/servers/mib/main.c b/minix/servers/mib/main.c index 8101c0e03..51d7492e9 100644 --- a/minix/servers/mib/main.c +++ b/minix/servers/mib/main.c @@ -18,6 +18,17 @@ * service needs superuser privileges because it may need to issue privileged * calls and obtain privileged information from other services. * + * While most of the sysctl tree is maintained locally, the MIB service also + * allows other services to register "remote" subtrees which are then handled + * entirely by those services. This feature, which works much like file system + * mounting, allows 1) sysctl handling code to stay local to its corresponding + * service, and 2) parts of the sysctl tree to adapt and expand dynamically as + * optional services are started and stopped. Compared to the MIB service's + * local handling, remotely handled subtrees are subject to several additional + * practical restrictions, hoever. In the current implementation, the MIB + * service makes blocking calls to remote services as needed; in the future, + * these interactions could be made (more) asynchronous. + * * The MIB service was created by David van Moolenbroek . */ @@ -25,14 +36,17 @@ /* * Most of these initially empty nodes are filled in by their corresponding - * modules' _init calls; see mib_init below. However, CTL_USER stays empty: - * the libc sysctl(3) wrapper code takes care of that subtree. It must have - * an entry here though, or sysctl(8) will not list it. CTL_VENDOR is also - * empty, but writable, so that it may be used by third parties. + * modules' _init calls; see mib_init below. However, some subtrees are not + * populated by the MIB service itself. CTL_NET is expected to be populated + * through registration of remote subtrees. The libc sysctl(3) wrapper code + * takes care of the CTL_USER subtree. It must have an entry here though, or + * sysctl(8) will not list it. CTL_VENDOR is also empty, but writable, so that + * it may be used by third parties. */ static struct mib_node mib_table[] = { /* 1*/ [CTL_KERN] = MIB_ENODE(_P | _RO, "kern", "High kernel"), /* 2*/ [CTL_VM] = MIB_ENODE(_P | _RO, "vm", "Virtual memory"), +/* 4*/ [CTL_NET] = MIB_ENODE(_P | _RO, "net", "Networking"), /* 6*/ [CTL_HW] = MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"), /* 8*/ [CTL_USER] = MIB_ENODE(_P | _RO, "user", "User-level"), /*11*/ [CTL_VENDOR] = MIB_ENODE(_P | _RW, "vendor", "Vendor specific"), @@ -45,7 +59,7 @@ static struct mib_node mib_table[] = { * node is writable by default, so that programs such as init(8) may create * their own top-level entries. */ -static struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", ""); +struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", ""); /* * Structures describing old and new data as provided by userland. The primary @@ -187,6 +201,56 @@ mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr, return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len); } +/* + * Create a grant for a call's old data region, if not NULL, for the given + * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the + * length in lenp, and return OK. On error, return an error code that must not + * be ENOMEM. + */ +int +mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp, + cp_grant_id_t * grantp, size_t * __restrict lenp) +{ + + if (oldp != NULL) { + *grantp = cpf_grant_magic(endpt, oldp->oldp_endpt, + oldp->oldp_addr, oldp->oldp_len, CPF_WRITE); + if (!GRANT_VALID(*grantp)) + return EINVAL; + *lenp = oldp->oldp_len; + } else { + *grantp = GRANT_INVALID; + *lenp = 0; + } + + return OK; +} + +/* + * Create a grant for a call's new data region, if not NULL, for the given + * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the + * length in lenp, and return OK. On error, return an error code that must not + * be ENOMEM. + */ +int +mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp, + cp_grant_id_t * grantp, size_t * __restrict lenp) +{ + + if (newp != NULL) { + *grantp = cpf_grant_magic(endpt, newp->newp_endpt, + newp->newp_addr, newp->newp_len, CPF_READ); + if (!GRANT_VALID(*grantp)) + return EINVAL; + *lenp = newp->newp_len; + } else { + *grantp = GRANT_INVALID; + *lenp = 0; + } + + return OK; +} + /* * Check whether the user is allowed to perform privileged operations. The * function returns a nonzero value if this is the case, and zero otherwise. @@ -211,7 +275,8 @@ mib_authed(struct mib_call * call) * Implement the sysctl(2) system call. */ static int -mib_sysctl(message * __restrict m_in, message * __restrict m_out) +mib_sysctl(message * __restrict m_in, int ipc_status, + message * __restrict m_out) { vir_bytes oldaddr, newaddr; size_t oldlen, newlen; @@ -223,6 +288,10 @@ mib_sysctl(message * __restrict m_in, message * __restrict m_out) struct mib_call call; ssize_t r; + /* Only handle blocking calls. Ignore everything else. */ + if (IPC_STATUS_CALL(ipc_status) != SENDREC) + return EDONTREPLY; + endpt = m_in->m_source; oldaddr = m_in->m_lc_mib_sysctl.oldp; oldlen = m_in->m_lc_mib_sysctl.oldlen; @@ -281,7 +350,7 @@ mib_sysctl(message * __restrict m_in, message * __restrict m_out) call.call_flags = 0; call.call_reslen = 0; - r = mib_dispatch(&call, &mib_root, oldpp, newpp); + r = mib_dispatch(&call, oldpp, newpp); /* * From NetBSD: we copy out as much as we can from the old data, while @@ -332,7 +401,10 @@ mib_init(int type __unused, sef_init_info_t * info __unused) * Now that the static tree is complete, go through the entire tree, * initializing miscellaneous fields. */ - mib_tree_init(&mib_root); + mib_tree_init(); + + /* Prepare for requests to mount remote subtrees. */ + mib_remote_init(); return OK; } @@ -385,19 +457,34 @@ main(void) switch (m_in.m_type) { case MIB_SYSCTL: - r = mib_sysctl(&m_in, &m_out); + r = mib_sysctl(&m_in, ipc_status, &m_out); + + break; + + case MIB_REGISTER: + r = mib_register(&m_in, ipc_status); + + break; + + case MIB_DEREGISTER: + r = mib_deregister(&m_in, ipc_status); break; default: - r = ENOSYS; + if (IPC_STATUS_CALL(ipc_status) == SENDREC) + r = ENOSYS; + else + r = EDONTREPLY; } - /* Send the reply. */ - m_out.m_type = r; + /* Send a reply, if applicable. */ + if (r != EDONTREPLY) { + m_out.m_type = r; - if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK) - printf("MIB: ipc_sendnb failed (%d)\n", r); + if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK) + printf("MIB: ipc_sendnb failed (%d)\n", r); + } } /* NOTREACHED */ diff --git a/minix/servers/mib/mib.h b/minix/servers/mib/mib.h index e9c598aef..413a109cc 100644 --- a/minix/servers/mib/mib.h +++ b/minix/servers/mib/mib.h @@ -22,6 +22,14 @@ */ #define MINIX_TEST_SUBTREE 1 /* include the minix.test subtree? */ +/* + * By default, mount request failures will be silently discarded, because the + * requests themselves are one-way. For service authors, a bit more output may + * be helpful. Set the following defininition to "printf s" in order to + * include more information about mount requests and failures. + */ +#define MIB_DEBUG_MOUNT(s) /* printf s */ + struct mib_oldp; struct mib_newp; @@ -42,8 +50,8 @@ struct mib_call { #define MIB_FLAG_NOAUTH 0x02 /* user verified to be regular user */ /* - * We reassign new meaning to two NetBSD node flags, because we do not use the - * flags in the way NetBSD does: + * We reassign new meaning to three NetBSD node flags, because we do not use + * the flags in the way NetBSD does: * * - On NetBSD, CTLFLAG_ROOT is used to mark the root of the sysctl tree. The * entire root node is not exposed to userland, and thus, neither is this @@ -52,13 +60,18 @@ struct mib_call { * node, presumably to avoid having to duplicate entire subtrees. We can * simply have two nodes point to the same subtree instead, and thus, we do * not need to support this functionality at all. + * - On NetBSD, CTLFLAG_MMAP is defined for future support for memory-mapping + * node data with CTL_MMAP. It is not yet clear where or why this feature + * would be used in practice. For as long as NetBSD does not actually use + * this flag *for node-type nodes*, we can reuse it for our own purposes. * * The meaning of our replacement flags is explained further below. We ensure - * that neither of these flags are ever exposed to userland. As such, our own + * that none of these flags are ever exposed to userland. As such, our own * definitions can be changed as necessary without breaking anything. */ #define CTLFLAG_PARENT CTLFLAG_ROOT /* node is a real parent node */ #define CTLFLAG_VERIFY CTLFLAG_ALIAS /* node has verification function */ +#define CTLFLAG_REMOTE CTLFLAG_MMAP /* node is root of remote subtree */ /* * The following node structure definition aims to meet several goals at once: @@ -66,12 +79,14 @@ struct mib_call { * 1) it can be used for static and dynamic nodes; * 2) it can be used to point to both static and dynamic child arrays at once; * 3) it allows for embedded, pointed-to, and function-generated data; - * 4) its unions are compatible with magic instrumentation; - * 5) it is optimized for size, assuming many static and few dynamic nodes. + * 4) it allows both temporary and obscuring mount points for remote subtrees; + * 5) its unions are compatible with magic instrumentation; + * 6) it is optimized for size, assuming many static and few dynamic nodes. * - * All nodes have flags, a size, a version, a name, and optionally a - * description. The use of the rest of the fields depends on the type of the - * node, which is defined by part of the flags field. + * All nodes have flags, a size, a version, a parent (except the root node), a + * name, and optionally a description. The use of the rest of the fields + * depends on the type of the node, which is defined as part of the node's + * flags field. * * Data nodes, that is, nodes of type CTLTYPE_{BOOL,INT,QUAD,STRING,STRUCT}, * have associated data. For types CTLTYPE_{BOOL,INT,QUAD}, the node may have @@ -90,24 +105,61 @@ struct mib_call { * (size, immediate and/or pointer) data fields as it sees fit. * * Node-type nodes, of type CTLTYPE_NODE, behave differently. Such nodes may - * have either static and dynamic child nodes, or an associated function. Such - * a function handles all access to the entire subtree. If no function is set, - * the CTLFLAG_PARENT flag is set, to indicate that this node is the root of a - * real subtree; CTLFLAG_PARENT must not be set if the node has an associated - * function. For real node-type nodes (with CTLFLAG_PARENT set), node_size is - * the number (not size!) of the array of static child nodes, which is pointed - * to by node_scptr and indexed by child identifier. Within the static array, - * child nodes with zeroed flags fields are not in use. The node_dcptr field - * points to a linked list of dynamic child nodes. The node_csize field is set - * to the size of the static array plus the number of dynamic nodes; node_clen - * is set to the number of valid entries in the static array plus the number of - * dynamic nodes. If a function is set, none of these fields are used, and the - * node_size field is typically (but not necessarily) set to zero. + * have static and dynamic child nodes, or have an associated function, or be + * a mount point for a subtree handled by a remote process. The exact case is + * defined by the combination of the CTLFLAG_PARENT and CTLFLAG_REMOTE flags, + * yielding four possible cases: + * + * CTLFLAG_PARENT CTLFLAG_REMOTE Meaning + * not set not set The node has an associated function which + * handles all access to the entire subtree. + * set not set The node is the root of a real, local + * subtree with static and/or dynamic children. + * not set set The node is a temporarily created mount + * point for a remote tree. A remote service + * handles all access to the entire subtree. + * Unmounting the node also destroys the node. + * set set The node is a mount point that obscures a + * real, local subtree. A remote service + * handles all access to the entire subtree. + * Unmounting makes the original node visible. + * + * If the CTLFLAG_PARENT flag is set, the node is the root of a real sutree. + * For such nodes, node_size is the number (not size!) of the array of static + * child nodes, which is pointed to by node_scptr and indexed by child + * identifier. Within the static array, child nodes with zeroed flags fields + * are not in use. The node_dcptr field points to a linked list of dynamic + * child nodes. The node_csize field is set to the size of the static array + * plus the number of dynamic nodes; node_clen is set to the number of valid + * entries in the static array plus the number of dynamic nodes. + * + * If a function is set, and thus neither CTLFLAG_PARENT and CTLFLAG_REMOTE are + * set, none of the aforementioned fields are used, and the node_size field is + * typically (but not necessarily) set to zero. + * + * A remote service can mount its own subtree into the central MIB tree. The + * MIB service will then relay any requests for that subtree to the remote + * service. Both the mountpoint and the root of the remote subtree must be of + * type CTLTYPE_NODE; thus, no individual leaf nodes may be mounted. The mount + * point may either be created temporarily for the purpose of mounting (e.g., + * net.inet), or it may override a preexisting node (e.g., kern.ipc). In the + * first case, the parent node must exist and be a node type (net). In the + * second case, the preexisting target node (the MIB service's kern.ipc) may + * not have an associated function and may only have static children. While + * being used as a mountpoint (i.e., have CTLFLAG_REMOTE set), the local node's + * node_csize and node_clen fields must not be used. Instead, the same space + * in the node structure is used to store information about the remote node: + * node_rid, node_tid, and the smaller node_rcsize and node_rclen which contain + * information about the root of the remote subtree. Remote nodes are also + * part of a linked list for administration purposes, using the node_next + * field. When a preexisting (CTLFLAG_PARENT) node is unmounted, its original + * node_csize and node_clen fields are recomputed. * * The structure uses unions for either only pointers or only non-pointers, to * simplify live update support. However, this does not mean the structure is - * not fully used: real node-type nodes use node_{flags,size,ver,csize,clen, - * scptr,dcptr,name,desc}, which together add up to the full structure size. + * not fully used: real node-type nodes use node_{flags,size,ver,parent,csize, + * clen,scptr,dcptr,name,desc}, which together add up to the full structure + * size. */ struct mib_node; struct mib_dynode; @@ -117,47 +169,77 @@ typedef ssize_t (*mib_func_ptr)(struct mib_call *, struct mib_node *, typedef int (*mib_verify_ptr)(struct mib_call *, struct mib_node *, void *, size_t); +/* + * To save space for the maintenance of remote nodes, we split up one uint32_t + * field into three subfields: + * - node_eid ("endpoint ID"), which is an index into the table of endpoints; + * - node_rcsize ("child size"), the number of child slots of the remote root; + * - node_rclen ("child length"), the number of children of the remote root. + * These fields impose limits on the number of endpoints known in the MIB + * service, and the maximum size of the remote subtree root. + */ +#define MIB_EID_BITS 5 /* up to 32 services can set remote subtrees */ +#define MIB_RC_BITS 12 /* remote root may have up to 4096 children */ + +#if MIB_EID_BITS + 2 * MIB_RC_BITS > 32 +#error "Sum of remote ID and remote children bit fields exceeds uint32_t size" +#endif + struct mib_node { - uint32_t node_flags; /* CTLTYPE_ type and CTLFLAGS_ flags */ + uint32_t node_flags; /* CTLTYPE_ type and CTLFLAG_ flags */ size_t node_size; /* size of associated data (bytes) */ uint32_t node_ver; /* node version */ + struct mib_node *node_parent; /* pointer to parent node */ union ixfer_node_val_u { struct { uint32_t nvuc_csize; /* number of child slots */ uint32_t nvuc_clen; /* number of actual children */ } nvu_child; - int nvu_int; /* immediate integer */ + struct { + uint32_t nvur_eid:MIB_EID_BITS; /* endpoint index */ + uint32_t nvur_csize:MIB_RC_BITS;/* remote ch. slots */ + uint32_t nvur_clen:MIB_RC_BITS; /* remote children */ + uint32_t nvur_rid; /* opaque ID of remote root */ + } nvu_remote; bool nvu_bool; /* immediate boolean */ + int nvu_int; /* immediate integer */ u_quad_t nvu_quad; /* immediate quad */ } node_val_u; union pxfer_node_ptr_u { - void *npu_data; /* struct or string data pointer */ + void *npu_data; /* struct or string data pointer */ struct mib_node *npu_scptr; /* static child node array */ } node_ptr_u; union pxfer_node_aux_u { struct mib_dynode *nau_dcptr; /* dynamic child node list */ mib_func_ptr nau_func; /* handler function */ mib_verify_ptr nau_verify; /* verification function */ + struct mib_node *nau_next; /* next remote node in list */ } node_aux_u; const char *node_name; /* node name string */ const char *node_desc; /* node description (may be NULL) */ }; #define node_csize node_val_u.nvu_child.nvuc_csize #define node_clen node_val_u.nvu_child.nvuc_clen -#define node_int node_val_u.nvu_int +#define node_eid node_val_u.nvu_remote.nvur_eid +#define node_rcsize node_val_u.nvu_remote.nvur_csize +#define node_rclen node_val_u.nvu_remote.nvur_clen +#define node_rid node_val_u.nvu_remote.nvur_rid #define node_bool node_val_u.nvu_bool +#define node_int node_val_u.nvu_int #define node_quad node_val_u.nvu_quad #define node_data node_ptr_u.npu_data #define node_scptr node_ptr_u.npu_scptr #define node_dcptr node_aux_u.nau_dcptr #define node_func node_aux_u.nau_func #define node_verify node_aux_u.nau_verify +#define node_next node_aux_u.nau_next /* * This structure is used for dynamically allocated nodes, that is, nodes * created by userland at run time. It contains not only the fields below, but * also the full name and, for leaf nodes with non-immediate data, the actual - * data area. + * data area, or, for temporary mount points for remote subtrees, the node's + * description. */ struct mib_dynode { struct mib_dynode *dynode_next; /* next in linked dynamic node list */ @@ -179,13 +261,6 @@ struct mib_dynode { .node_name = n, \ .node_desc = d \ } -#define MIB_INT(f,i,n,d) { \ - .node_flags = CTLTYPE_INT | CTLFLAG_IMMEDIATE | f, \ - .node_size = sizeof(int), \ - .node_int = i, \ - .node_name = n, \ - .node_desc = d \ -} #define MIB_BOOL(f,b,n,d) { \ .node_flags = CTLTYPE_BOOL | CTLFLAG_IMMEDIATE | f, \ .node_size = sizeof(bool), \ @@ -193,6 +268,13 @@ struct mib_dynode { .node_name = n, \ .node_desc = d \ } +#define MIB_INT(f,i,n,d) { \ + .node_flags = CTLTYPE_INT | CTLFLAG_IMMEDIATE | f, \ + .node_size = sizeof(int), \ + .node_int = i, \ + .node_name = n, \ + .node_desc = d \ +} #define MIB_QUAD(f,q,n,d) { \ .node_flags = CTLTYPE_QUAD | CTLFLAG_IMMEDIATE | f, \ .node_size = sizeof(u_quad_t), \ @@ -200,16 +282,18 @@ struct mib_dynode { .node_name = n, \ .node_desc = d \ } -#define MIB_DATA(f,s,n,d) { \ +#define _MIB_DATA(f,s,p,n,d) { \ .node_flags = f, \ - .node_size = sizeof(s), \ - .node_data = __UNCONST(s), \ + .node_size = s, \ + .node_data = __UNCONST(p), \ .node_name = n, \ .node_desc = d \ } -#define MIB_STRING(f,p,n,d) MIB_DATA(CTLTYPE_STRING | f, p, n, d) -#define MIB_STRUCT(f,p,n,d) MIB_DATA(CTLTYPE_STRUCT | f, p, n, d) -#define MIB_INTPTR(f,p,n,d) MIB_DATA(CTLTYPE_INT | f, p, n, d) +#define MIB_BOOLPTR(f,p,n,d) _MIB_DATA(CTLTYPE_BOOL | f, sizeof(*p), p, n, d) +#define MIB_INTPTR(f,p,n,d) _MIB_DATA(CTLTYPE_INT | f, sizeof(*p), p, n, d) +#define MIB_QUADTR(f,p,n,d) _MIB_DATA(CTLTYPE_QUAD | f, sizeof(*p), p, n, d) +#define MIB_STRING(f,p,n,d) _MIB_DATA(CTLTYPE_STRING | f, sizeof(p), p, n, d) +#define MIB_STRUCT(f,s,p,n,d) _MIB_DATA(CTLTYPE_STRUCT | f, s, p, n, d) #define MIB_FUNC(f,s,fp,n,d) { \ .node_flags = f, \ .node_size = s, \ @@ -258,16 +342,32 @@ size_t mib_getnewlen(struct mib_newp *); int mib_copyin(struct mib_newp * __restrict, void * __restrict, size_t); int mib_copyin_aux(struct mib_newp * __restrict, vir_bytes, void * __restrict, size_t); +int mib_relay_oldp(endpoint_t, struct mib_oldp * __restrict, cp_grant_id_t *, + size_t * __restrict); +int mib_relay_newp(endpoint_t, struct mib_newp * __restrict, cp_grant_id_t *, + size_t * __restrict); int mib_authed(struct mib_call *); +extern struct mib_node mib_root; /* tree.c */ ssize_t mib_readwrite(struct mib_call *, struct mib_node *, struct mib_oldp *, struct mib_newp *, mib_verify_ptr); -ssize_t mib_dispatch(struct mib_call *, struct mib_node *, struct mib_oldp *, - struct mib_newp *); -void mib_tree_init(struct mib_node *); -extern unsigned int nodes; -extern unsigned int objects; +ssize_t mib_dispatch(struct mib_call *, struct mib_oldp *, struct mib_newp *); +void mib_tree_init(void); +int mib_mount(const int *, unsigned int, unsigned int, uint32_t, uint32_t, + unsigned int, unsigned int, struct mib_node **); +void mib_unmount(struct mib_node *); +extern unsigned int mib_nodes; +extern unsigned int mib_objects; +extern unsigned int mib_remotes; + +/* remote.c */ +void mib_remote_init(void); +int mib_register(const message *, int); +int mib_deregister(const message *, int); +int mib_remote_info(unsigned int, uint32_t, char *, size_t, char *, size_t); +ssize_t mib_remote_call(struct mib_call *, struct mib_node *, + struct mib_oldp *, struct mib_newp *); /* proc.c */ ssize_t mib_kern_lwp(struct mib_call *, struct mib_node *, struct mib_oldp *, diff --git a/minix/servers/mib/minix.c b/minix/servers/mib/minix.c index 683d95063..08a1739ef 100644 --- a/minix/servers/mib/minix.c +++ b/minix/servers/mib/minix.c @@ -24,7 +24,8 @@ static struct mib_node mib_minix_test_table[] = { /* 2*/ [TEST_QUAD] = MIB_QUAD(_RW, 0, "quad", "Quad test field"), /* 3*/ [TEST_STRING] = MIB_STRING(_RW, test_string, "string", "String test field"), -/* 4*/ [TEST_STRUCT] = MIB_STRUCT(_RW, test_struct, "struct", +/* 4*/ [TEST_STRUCT] = MIB_STRUCT(_RW, sizeof(test_struct), + test_struct, "struct", "Structure test field"), /* 5*/ [TEST_PRIVATE] = MIB_INT(_RW | CTLFLAG_PRIVATE, -5375, "private", "Private test field"), @@ -45,11 +46,14 @@ static struct mib_node mib_minix_test_table[] = { static struct mib_node mib_minix_mib_table[] = { /* 1*/ [MIB_NODES] = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED, - &nodes, "nodes", + &mib_nodes, "nodes", "Number of nodes in the MIB tree"), /* 2*/ [MIB_OBJECTS] = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED, - &objects, "objects", "Number of " + &mib_objects, "objects", "Number of " "dynamically allocated MIB objects"), +/* 3*/ [MIB_REMOTES] = MIB_INTPTR(_P | _RO | CTLFLAG_UNSIGNED, + &mib_remotes, "remotes", + "Number of mounted remote MIB subtrees"), }; static struct mib_node mib_minix_proc_table[] = { @@ -63,7 +67,7 @@ static struct mib_node mib_minix_proc_table[] = { static struct mib_node mib_minix_table[] = { #if MINIX_TEST_SUBTREE -/* 0*/ [MINIX_TEST] = MIB_NODE(_RW | CTLFLAG_HIDDEN, +/* 0*/ [MINIX_TEST] = MIB_NODE(_P | _RW | CTLFLAG_HIDDEN, mib_minix_test_table, "test", "Test87 testing ground"), #endif /* MINIX_TEST_SUBTREE */ diff --git a/minix/servers/mib/remote.c b/minix/servers/mib/remote.c new file mode 100644 index 000000000..24d7fea7c --- /dev/null +++ b/minix/servers/mib/remote.c @@ -0,0 +1,477 @@ +/* MIB service - remote.c - remote service management and communication */ + +#include "mib.h" + +/* + * TODO: the main feature that is missing here is a more active way to + * determine that a particular service has died, so that its mount points can + * be removed proactively. Without this, there is a (small) risk that we end + * up talking to a recycled endpoint with a service that ignores our request, + * resulting in a deadlock of the MIB service. Right now, the problem is that + * there is no proper DS API to subscribe to generic service-down events. + * + * In the long term, communication to other services should be made + * asynchronous, so that the MIB service does not block if there are problems + * with the other service. The protocol should already support this, and some + * simplifications are the result of preparing for future asynchrony support + * (such as not dynamically querying the remote root node for its properties, + * which would be very hard to implement in a nonblocking way). However, + * actual support is missing. For now we assume that the remote service either + * answers the request, or crashes (causing the sendrec to abort), which is + * mostly good enough. + */ + +/* This is the maximum number of remote services that may register subtrees. */ +#define MIB_ENDPTS (1U << MIB_EID_BITS) + +/* This is the maximum service label size, including '\0'. */ +#define MIB_LABEL_MAX 16 + +/* Table of remote endpoints, indexed by mount point nodes' node_eid fields. */ +static struct { + endpoint_t endpt; /* remote endpoint or NONE */ + struct mib_node *nodes; /* head of list of mount point nodes */ + char label[MIB_LABEL_MAX]; /* label of the remote endpoint */ +} endpts[MIB_ENDPTS]; + +/* + * Initialize the table of remote endpoints. + */ +void +mib_remote_init(void) +{ + unsigned int i; + + for (i = 0; i < __arraycount(endpts); i++) { + endpts[i].endpt = NONE; + endpts[i].nodes = NULL; + } +} + +/* + * The remote endpoint with the given table index has been determined to have + * died. Clean up all its mount points. + */ +static void +mib_down(unsigned int eid) +{ + struct mib_node *node, *next_node; + + assert(endpts[eid].endpt != NONE); + assert(endpts[eid].nodes != NULL); + + /* Unmount each of the remote endpoint's mount points. */ + for (node = endpts[eid].nodes; node != NULL; node = next_node) { + /* The unmount call may deallocate the node object. */ + next_node = node->node_next; + + mib_unmount(node); + } + + /* Mark the entry itself as no longer in use. */ + endpts[eid].endpt = NONE; + endpts[eid].nodes = NULL; +} + +/* + * Obtain the label for the given endpoint. On success, return OK and store + * the label in the given buffer. If the label cannot be retrieved or does not + * fit in the given buffer, return a negative error code. + */ +static int +mib_get_label(endpoint_t endpt, char * label, size_t labelsize) +{ + char key[DS_MAX_KEYLEN]; + int r; + + /* TODO: init has a label, so this is not a proper is-service test! */ + if ((r = ds_retrieve_label_name(key, endpt)) != OK) { + printf("MIB: unable to obtain label for %d\n", endpt); + + return r; + } + + key[sizeof(key) - 1] = 0; + if (strlen(key) >= labelsize) { + /* This should really never happen. */ + printf("MIB: service %d label '%s' is too long\n", endpt, key); + + return ENAMETOOLONG; + } + + strlcpy(label, key, labelsize); + return OK; +} + +/* + * Register a remote subtree, mounting it in the local tree as requested. + */ +static void +mib_do_register(endpoint_t endpt, const char * label, uint32_t rid, + uint32_t flags, unsigned int csize, unsigned int clen, const int * mib, + unsigned int miblen) +{ + struct mib_node *node; + unsigned int eid; + int r, free_eid; + + /* + * See if we already have a remote endpoint for the service's label. + * If so, we can safely assume that the old endpoint has died and we + * have to unmount any previous entries. Also find a free entry for + * the remote endpoint if it is new. + */ + free_eid = -1; + for (eid = 0; eid < __arraycount(endpts); eid++) { + if (endpts[eid].endpt == endpt) + break; + else if (endpts[eid].endpt != NONE && + !strcmp(endpts[eid].label, label)) { + mib_down(eid); + + assert(endpts[eid].endpt == NONE); + assert(endpts[eid].nodes == NULL); + + break; + } else if (endpts[eid].endpt == NONE && free_eid < 0) + free_eid = eid; + } + + if (eid == __arraycount(endpts)) { + if (free_eid < 0) { + printf("MIB: remote endpoints table is full!\n"); + + return; + } + + eid = free_eid; + } + + /* + * Make sure that the caller does not introduce two mount points with + * the same ID. Right now we refuse such requests; instead, we could + * also choose to first deregister the old mount point with this ID. + */ + for (node = endpts[eid].nodes; node != NULL; node = node->node_next) { + if (node->node_rid == rid) + break; + } + + if (node != NULL) { + MIB_DEBUG_MOUNT(("MIB: service %d tried to reuse ID %"PRIu32 + "\n", endpt, rid)); + + return; + } + + /* + * If we did not already have an entry for this endpoint, add one now, + * because the mib_mount() call will expect it to be there. If the + * mount call fails, we may have to invalidate the entry again. + */ + if (endpts[eid].endpt == NONE) { + endpts[eid].endpt = endpt; + endpts[eid].nodes = NULL; + strlcpy(endpts[eid].label, label, sizeof(endpts[eid].label)); + } + + /* Attempt to mount the remote subtree in the tree. */ + r = mib_mount(mib, miblen, eid, rid, flags, csize, clen, &node); + + if (r != OK) { + /* If the entry has no other mount points, invalidate it. */ + if (endpts[eid].nodes == NULL) + endpts[eid].endpt = NONE; + + return; + } + + /* Add the new node to the list of mount points of the endpoint. */ + node->node_next = endpts[eid].nodes; + endpts[eid].nodes = node; +} + +/* + * Process a mount point registration request from another service. + */ +int +mib_register(const message * m_in, int ipc_status) +{ + char label[DS_MAX_KEYLEN]; + + /* + * Registration messages must be one-way, or they may cause a deadlock + * if crossed by a request coming from us. This case also effectively + * eliminates the possibility for userland to register nodes. The + * return value of ENOSYS effectively tells userland that this call + * number is not in use, which allows us to repurpose call numbers + * later. + */ + if (IPC_STATUS_CALL(ipc_status) == SENDREC) + return ENOSYS; + + MIB_DEBUG_MOUNT(("MIB: got register request from %d\n", + m_in->m_source)); + + /* Double-check if the caller is a service by obtaining its label. */ + if (mib_get_label(m_in->m_source, label, sizeof(label)) != OK) + return EDONTREPLY; + + /* Perform one message-level bounds check here. */ + if (m_in->m_lsys_mib_register.miblen > + __arraycount(m_in->m_lsys_mib_register.mib)) + return EDONTREPLY; + + /* The rest of the work is handled by a message-agnostic function. */ + mib_do_register(m_in->m_source, label, + m_in->m_lsys_mib_register.root_id, m_in->m_lsys_mib_register.flags, + m_in->m_lsys_mib_register.csize, m_in->m_lsys_mib_register.clen, + m_in->m_lsys_mib_register.mib, m_in->m_lsys_mib_register.miblen); + + /* Never reply to this message. */ + return EDONTREPLY; +} + +/* + * Deregister a previously registered remote subtree, unmounting it from the + * local tree. + */ +static void +mib_do_deregister(endpoint_t endpt, uint32_t rid) +{ + struct mib_node *node, **nodep; + unsigned int eid; + + for (eid = 0; eid < __arraycount(endpts); eid++) { + if (endpts[eid].endpt == endpt) + break; + } + + if (eid == __arraycount(endpts)) { + MIB_DEBUG_MOUNT(("MIB: deregister request from unknown " + "endpoint %d\n", endpt)); + + return; + } + + for (nodep = &endpts[eid].nodes; *nodep != NULL; + nodep = &node->node_next) { + node = *nodep; + + if (node->node_rid == rid) + break; + } + + if (*nodep == NULL) { + MIB_DEBUG_MOUNT(("MIB: deregister request from %d for unknown " + "ID %"PRIu32"\n", endpt, rid)); + + return; + } + + /* + * The unmount function may or may not deallocate the node object, so + * remove it from the linked list first. If this leaves an empty + * linked list, also mark the remote endpoint entry itself as free. + */ + *nodep = node->node_next; + + if (endpts[eid].nodes == NULL) { + endpts[eid].endpt = NONE; + endpts[eid].nodes = NULL; + } + + /* Finally, unmount the remote subtree. */ + mib_unmount(node); +} + +/* + * Process a mount point deregistration request from another service. + */ +int +mib_deregister(const message * m_in, int ipc_status) +{ + + /* Same as for registration messages. */ + if (IPC_STATUS_CALL(ipc_status) == SENDREC) + return ENOSYS; + + MIB_DEBUG_MOUNT(("MIB: got deregister request from %d\n", + m_in->m_source)); + + /* The rest of the work is handled by a message-agnostic function. */ + mib_do_deregister(m_in->m_source, m_in->m_lsys_mib_register.root_id); + + /* Never reply to this message. */ + return EDONTREPLY; +} + +/* + * Retrieve information about the root of a remote subtree, specifically its + * name and description. This is done only when there was no corresponding + * local node and one has to be created temporarily. On success, return OK + * with the name and description stored in the given buffers. Otherwise, + * return a negative error code. + */ +int +mib_remote_info(unsigned int eid, uint32_t rid, char * name, size_t namesize, + char * desc, size_t descsize) +{ + endpoint_t endpt; + cp_grant_id_t name_grant, desc_grant; + message m; + int r; + + if (eid >= __arraycount(endpts) || endpts[eid].endpt == NONE) + return EINVAL; + + endpt = endpts[eid].endpt; + + if ((name_grant = cpf_grant_direct(endpt, (vir_bytes)name, namesize, + CPF_WRITE)) == GRANT_INVALID) + return EINVAL; + + if ((desc_grant = cpf_grant_direct(endpt, (vir_bytes)desc, descsize, + CPF_WRITE)) == GRANT_INVALID) { + cpf_revoke(name_grant); + + return EINVAL; + } + + memset(&m, 0, sizeof(m)); + + m.m_type = COMMON_MIB_INFO; + m.m_mib_lsys_info.req_id = 0; /* reserved for future async support */ + m.m_mib_lsys_info.root_id = rid; + m.m_mib_lsys_info.name_grant = name_grant; + m.m_mib_lsys_info.name_size = namesize; + m.m_mib_lsys_info.desc_grant = desc_grant; + m.m_mib_lsys_info.desc_size = descsize; + + r = ipc_sendrec(endpt, &m); + + cpf_revoke(desc_grant); + cpf_revoke(name_grant); + + if (r != OK) + return r; + + if (m.m_type != COMMON_MIB_REPLY) + return EINVAL; + if (m.m_lsys_mib_reply.req_id != 0) + return EINVAL; + + return m.m_lsys_mib_reply.status; +} + +/* + * Relay a sysctl(2) call from a user process to a remote service, because the + * call reached a mount point into a remote subtree. Return the result code + * from the remote service. Alternatively, return ERESTART if it has been + * determined that the remote service is dead, in which case its mount points + * will have been removed (possibly including the entire given node), and the + * caller should continue the call on the underlying local subtree if there is + * any. Note that the remote service may also return ERESTART to indicate that + * the remote subtree does not exist, either because it is being deregistered + * or because the remote service was restarted with loss of state. + */ +ssize_t +mib_remote_call(struct mib_call * call, struct mib_node * node, + struct mib_oldp * oldp, struct mib_newp * newp) +{ + cp_grant_id_t name_grant, oldp_grant, newp_grant; + size_t oldp_len, newp_len; + endpoint_t endpt; + message m; + int r; + + endpt = endpts[node->node_eid].endpt; + assert(endpt != NONE); + + /* + * Allocate grants. Since ENOMEM has a special meaning for sysctl(2), + * never return that code even if it is the most appropriate one. + * The remainder of the name may be empty; the callee should check. + */ + name_grant = cpf_grant_direct(endpt, (vir_bytes)call->call_name, + call->call_namelen * sizeof(call->call_name[0]), CPF_READ); + if (!GRANT_VALID(name_grant)) + return EINVAL; + + if ((r = mib_relay_oldp(endpt, oldp, &oldp_grant, &oldp_len)) != OK) { + cpf_revoke(name_grant); + + return r; + } + + if ((r = mib_relay_newp(endpt, newp, &newp_grant, &newp_len)) != OK) { + if (GRANT_VALID(oldp_grant)) + cpf_revoke(oldp_grant); + cpf_revoke(name_grant); + + return r; + } + + /* + * Construct the request message. We have not optimized this flow for + * performance. In particular, we never embed even short names in the + * message, and we supply a flag indicating whether the caller is root + * regardless of whether the callee is interested in this. This is + * more convenient for the callee, but also more costly. + */ + memset(&m, 0, sizeof(m)); + + m.m_type = COMMON_MIB_CALL; + m.m_mib_lsys_call.req_id = 0; /* reserved for future async support */ + m.m_mib_lsys_call.root_id = node->node_rid; + m.m_mib_lsys_call.name_grant = name_grant; + m.m_mib_lsys_call.name_len = call->call_namelen; + m.m_mib_lsys_call.oldp_grant = oldp_grant; + m.m_mib_lsys_call.oldp_len = oldp_len; + m.m_mib_lsys_call.newp_grant = newp_grant; + m.m_mib_lsys_call.newp_len = newp_len; + m.m_mib_lsys_call.user_endpt = call->call_endpt; + m.m_mib_lsys_call.flags = !!mib_authed(call); /* TODO: define flags */ + m.m_mib_lsys_call.root_ver = node->node_ver; + m.m_mib_lsys_call.tree_ver = mib_root.node_ver; + + /* Issue a synchronous call to the remove service. */ + r = ipc_sendrec(endpt, &m); + + /* Then first clean up. */ + if (GRANT_VALID(newp_grant)) + cpf_revoke(newp_grant); + if (GRANT_VALID(oldp_grant)) + cpf_revoke(oldp_grant); + cpf_revoke(name_grant); + + /* + * Treat any IPC-level error as an indication that there is a problem + * with the remote service. Declare it dead, remove all its mount + * points, and return ERESTART to indicate to the caller that it should + * (carefully) try to continue the request on a local subtree instead. + * Again: mib_down() may actually deallocate the given 'node' object. + */ + if (r != OK) { + mib_down(node->node_eid); + + return ERESTART; + } + + if (m.m_type != COMMON_MIB_REPLY) + return EINVAL; + if (m.m_lsys_mib_reply.req_id != 0) + return EINVAL; + + /* + * If a deregister message from the service crosses our call, we'll get + * the response before we get the deregister request. In that case, + * the remote service should return ERESTART to indicate that the mount + * point does not exist as far as it is concerned, so that we can try + * the local version of the tree instead. + */ + if (m.m_lsys_mib_reply.status == ERESTART) + mib_do_deregister(endpt, node->node_rid); + + return m.m_lsys_mib_reply.status; +} diff --git a/minix/servers/mib/tree.c b/minix/servers/mib/tree.c index 2b583a626..9eba191b8 100644 --- a/minix/servers/mib/tree.c +++ b/minix/servers/mib/tree.c @@ -22,8 +22,9 @@ #define SCRATCH_SIZE MAX(PAGE_SIZE, sizeof(struct sysctldesc) + MAXDESCLEN) static char scratch[SCRATCH_SIZE] __aligned(sizeof(int32_t)); -unsigned int nodes; /* how many nodes are there in the tree? */ -unsigned int objects; /* how many allocated memory objects are there? */ +unsigned int mib_nodes; /* how many nodes are there in the tree? */ +unsigned int mib_objects; /* how many memory objects are allocated? */ +unsigned int mib_remotes; /* how many remote subtrees are there? */ /* * Find a node through its parent node and identifier. Return the node if it @@ -99,12 +100,12 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off, memset(&scn, 0, sizeof(scn)); /* - * We use CTLFLAG_PARENT and CTLFLAG_VERIFY internally only. NetBSD - * uses the values of these flags for different purposes. Either way, - * do not expose them to userland. + * We use CTLFLAG_PARENT, CTLFLAG_VERIFY, and CTLFLAG_REMOTE internally + * only. NetBSD uses the values of these flags for different purposes. + * Either way, do not expose them to userland. */ - scn.sysctl_flags = SYSCTL_VERSION | - (node->node_flags & ~(CTLFLAG_PARENT | CTLFLAG_VERIFY)); + scn.sysctl_flags = SYSCTL_VERSION | (node->node_flags & + ~(CTLFLAG_PARENT | CTLFLAG_VERIFY | CTLFLAG_REMOTE)); scn.sysctl_num = id; strlcpy(scn.sysctl_name, node->node_name, sizeof(scn.sysctl_name)); scn.sysctl_ver = node->node_ver; @@ -135,13 +136,16 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off, /* Report the node size the way NetBSD does, just in case. */ scn.sysctl_size = sizeof(scn); - /* If this is a real parent node, report child information. */ - if ((node->node_flags & CTLFLAG_PARENT) && visible) { - scn.sysctl_csize = node->node_csize; - scn.sysctl_clen = node->node_clen; - } - /* + * If this is a remote node, use the values we have of the root + * of the remote subtree. If we did not have these values, we + * would have to call into the remote service here, which for + * reliability purposes is a bad idea. + * + * If this is a real parent node, report child information. In + * both these cases, expose child information only if the node + * itself is accessible by the caller. + * * If this is a function-driven node, indicate this by setting * a nonzero function address. This allows trace(1) to * determine that it should not attempt to descend into this @@ -150,7 +154,17 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off, * expected in these parts of the tree. Do not return the real * function pointer, as this would leak anti-ASR information. */ - if (!(node->node_flags & CTLFLAG_PARENT)) + if (node->node_flags & CTLFLAG_REMOTE) { + if (visible) { + scn.sysctl_csize = node->node_rcsize; + scn.sysctl_clen = node->node_rclen; + } + } else if (node->node_flags & CTLFLAG_PARENT) { + if (visible) { + scn.sysctl_csize = node->node_csize; + scn.sysctl_clen = node->node_clen; + } + } else scn.sysctl_func = SYSCTL_NODE_FN; } @@ -164,7 +178,7 @@ mib_copyout_node(struct mib_call * call, struct mib_oldp * oldp, size_t off, */ static ssize_t mib_query(struct mib_call * call, struct mib_node * parent, - struct mib_oldp * oldp, struct mib_newp * newp, struct mib_node * root) + struct mib_oldp * oldp, struct mib_newp * newp) { struct sysctlnode scn; struct mib_node *node; @@ -184,7 +198,8 @@ mib_query(struct mib_call * call, struct mib_node * parent, * If a node version number is given, it must match the version * of the parent or the root. */ - if (scn.sysctl_ver != 0 && scn.sysctl_ver != root->node_ver && + if (scn.sysctl_ver != 0 && + scn.sysctl_ver != mib_root.node_ver && scn.sysctl_ver != parent->node_ver) return EINVAL; } @@ -223,6 +238,33 @@ mib_query(struct mib_call * call, struct mib_node * parent, return off; } +/* + * Check whether the given name buffer contains a valid node name string. If + * the name is nonempty, properly terminated, and contains only acceptable + * characters, return the length of the string excluding null terminator. + * Otherwise, return zero to indicate failure. + */ +static size_t +mib_check_name(const char * name, size_t namesize) +{ + size_t namelen; + char c; + + /* Names must be nonempty, null terminated, C symbol style strings. */ + for (namelen = 0; namelen < namesize; namelen++) { + if ((c = name[namelen]) == '\0') + break; + /* A-Z, a-z, 0-9, _ only, and no digit as first character. */ + if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + c == '_' || (c >= '0' && c <= '9' && namelen > 0))) + return 0; + } + if (namelen == 0 || namelen == namesize) + return 0; + + return namelen; +} + /* * Scan a parent node's children, as part of new node creation. Search for * either a free node identifier (if given_id < 0) or collisions with the node @@ -379,30 +421,63 @@ mib_copyin_str(struct mib_newp * __restrict newp, vir_bytes addr, /* * Increase the version of the root node, and copy this new version to all - * nodes on the path to a node, as well as (optionally) that node itself. + * nodes on the path to the given node, including that node itself. */ static void -mib_upgrade(struct mib_node ** stack, int depth, struct mib_node * node) +mib_upgrade(struct mib_node * node) { uint32_t ver; - /* - * The bottom of the stack is always the root node, which determines - * the version of the entire tree. Do not use version number 0, as a - * zero version number indicates no interest in versions elsewhere. - */ - assert(depth > 0); + assert(node != NULL); - ver = stack[0]->node_ver + 1; + /* + * The root node determines the version of the entire tree. Do not use + * version number 0, as a zero version number indicates no interest in + * versions elsewhere. + */ + + ver = mib_root.node_ver + 1; if (ver == 0) ver = 1; /* Copy the new version to all the nodes on the path. */ - while (depth-- > 0) - stack[depth]->node_ver = ver; + do { + node->node_ver = ver; - if (node != NULL) - node->node_ver = stack[0]->node_ver; + node = node->node_parent; + } while (node != NULL); +} + +/* + * Add a new dynamically allocated node into the tree, inserting it into the + * linked-list position of the parent tree as given by 'prevp'. Also update + * versions and counters accordingly. This function never fails. + */ +static void +mib_add(struct mib_dynode * dynode, struct mib_dynode ** prevp) +{ + struct mib_node *parent; + + parent = dynode->dynode_node.node_parent; + assert(parent != NULL); + + /* Link the dynamic node into the list, in the right place. */ + assert(prevp != NULL); + dynode->dynode_next = *prevp; + *prevp = dynode; + + /* The parent node now has one more child. */ + parent->node_csize++; + parent->node_clen++; + + /* There is now one more node in the tree. */ + mib_nodes++; + + /* + * Bump the version of all nodes on the path to the new node, including + * the node itself. + */ + mib_upgrade(&dynode->dynode_node); } /* @@ -410,8 +485,7 @@ mib_upgrade(struct mib_node ** stack, int depth, struct mib_node * node) */ static ssize_t mib_create(struct mib_call * call, struct mib_node * parent, - struct mib_oldp * oldp, struct mib_newp * newp, - struct mib_node ** stack, int depth) + struct mib_oldp * oldp, struct mib_newp * newp) { struct mib_dynode *dynode, **prevp; struct mib_node *node; @@ -426,6 +500,13 @@ mib_create(struct mib_call * call, struct mib_node * parent, if (!mib_authed(call)) return EPERM; + /* + * The parent must not be a remote node, but this is already implied by + * the fact that we got here at all. + */ + assert(SYSCTL_TYPE(parent->node_flags) == CTLTYPE_NODE); + assert(!(parent->node_flags & CTLFLAG_REMOTE)); + /* The parent node must not be marked as read-only. */ if (!(parent->node_flags & CTLFLAG_READWRITE)) return EPERM; @@ -456,13 +537,11 @@ mib_create(struct mib_call * call, struct mib_node * parent, return EINVAL; /* - * If a node version number is given, it must match the version of the - * parent or the root (which is always the bottom of the node stack). - * The given version number is *not* used for the node being created. + * If a node version number is given, it must match the version of + * either the parent or the root node. The given version number is + * *not* used for the node being created. */ - assert(depth > 0); - - if (scn.sysctl_ver != 0 && scn.sysctl_ver != stack[0]->node_ver && + if (scn.sysctl_ver != 0 && scn.sysctl_ver != mib_root.node_ver && scn.sysctl_ver != parent->node_ver) return EINVAL; @@ -554,16 +633,10 @@ mib_create(struct mib_call * call, struct mib_node * parent, if (scn.sysctl_func != NULL || scn.sysctl_parent != NULL) return EINVAL; - /* Names must be nonempty, null terminated, C symbol style strings. */ - for (namelen = 0; namelen < sizeof(scn.sysctl_name); namelen++) { - if ((c = scn.sysctl_name[namelen]) == '\0') - break; - /* A-Z, a-z, 0-9, _ only, and no digit as first character. */ - if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || - c == '_' || (c >= '0' && c <= '9' && namelen > 0))) - return EINVAL; - } - if (namelen == 0 || namelen == sizeof(scn.sysctl_name)) + /* Verify that the given name is valid, and get its string length. */ + namelen = mib_check_name(scn.sysctl_name, sizeof(scn.sysctl_name)); + + if (namelen == 0) return EINVAL; /* @@ -609,7 +682,7 @@ mib_create(struct mib_call * call, struct mib_node * parent, if ((dynode = malloc(size)) == NULL) return EINVAL; /* do not return ENOMEM */ - objects++; + mib_objects++; /* From here on, we have to free "dynode" before returning an error. */ r = OK; @@ -623,6 +696,7 @@ mib_create(struct mib_call * call, struct mib_node * parent, if (SYSCTL_TYPE(scn.sysctl_flags) == CTLTYPE_NODE) node->node_flags |= CTLFLAG_PARENT; node->node_size = scn.sysctl_size; + node->node_parent = parent; node->node_name = dynode->dynode_name; /* Initialize the node value. */ @@ -682,29 +756,16 @@ mib_create(struct mib_call * call, struct mib_node * parent, /* Deal with earlier failures now. */ if (r != OK) { free(dynode); - objects--; + mib_objects--; return r; } - /* At this point, actual creation can no longer fail. */ - - /* Link the dynamic node into the list, in the right place. */ - assert(prevp != NULL); - dynode->dynode_next = *prevp; - *prevp = dynode; - - /* The parent node now has one more child. */ - parent->node_csize++; - parent->node_clen++; - - nodes++; - /* - * Bump the version of all nodes on the path to the new node, including - * the node itself. + * At this point, actual creation can no longer fail. Add the node + * into the tree, and update versions and counters. */ - mib_upgrade(stack, depth, node); + mib_add(dynode, prevp); /* * Copy out the newly created node as resulting ("old") data. Do not @@ -713,15 +774,72 @@ mib_create(struct mib_call * call, struct mib_node * parent, return mib_copyout_node(call, oldp, 0, id, node); } +/* + * Remove the given node from the tree. If 'prevp' is NULL, the node is a + * static node which should be zeroed out. If 'prevp' is not NULL, the node is + * a dynamic node which should be freed; 'prevp' will then point to the pointer + * to its dynode container. Also update versions and counters as appropriate. + * This function never fails. + */ +static void +mib_remove(struct mib_node * node, struct mib_dynode ** prevp) +{ + struct mib_dynode *dynode; + struct mib_node *parent; + + parent = node->node_parent; + assert(parent != NULL); + + /* If the description was allocated, free it. */ + if (node->node_flags & CTLFLAG_OWNDESC) { + free(__UNCONST(node->node_desc)); + mib_objects--; + } + + /* + * Static nodes only use static memory, and dynamic nodes have the data + * area embedded in the dynode object. In neither case is data memory + * allocated separately, and thus, it need never be freed separately. + * Therefore we *must not* check CTLFLAG_OWNDATA here. + */ + + assert(parent->node_csize > 0); + assert(parent->node_clen > 0); + + /* + * Dynamic nodes must be freed. Freeing the dynode object also frees + * the node name and any associated data. Static nodes are zeroed out, + * and the static memory they referenced will become inaccessible. + */ + if (prevp != NULL) { + dynode = *prevp; + *prevp = dynode->dynode_next; + + assert(node == &dynode->dynode_node); + + free(dynode); + mib_objects--; + + parent->node_csize--; + } else + memset(node, 0, sizeof(*node)); + + parent->node_clen--; + + mib_nodes--; + + /* Bump the version of all nodes on the path to the destroyed node. */ + mib_upgrade(parent); +} + /* * Destroy a node. */ static ssize_t mib_destroy(struct mib_call * call, struct mib_node * parent, - struct mib_oldp * oldp, struct mib_newp * newp, - struct mib_node ** stack, int depth) + struct mib_oldp * oldp, struct mib_newp * newp) { - struct mib_dynode *dynode, **prevp; + struct mib_dynode **prevp; struct mib_node *node; struct sysctlnode scn; ssize_t r; @@ -754,6 +872,10 @@ mib_destroy(struct mib_call * call, struct mib_node * parent, /* For node-type nodes, extra rules apply. */ if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE) { + /* The node must not be a mount point. */ + if (node->node_flags & CTLFLAG_REMOTE) + return EBUSY; + /* The node must not have an associated function. */ if (!(node->node_flags & CTLFLAG_PARENT)) return EPERM; @@ -783,44 +905,12 @@ mib_destroy(struct mib_call * call, struct mib_node * parent, */ r = mib_copyout_node(call, oldp, 0, scn.sysctl_num, node); - /* If the description was allocated, free it. */ - if (node->node_flags & CTLFLAG_OWNDESC) { - free(__UNCONST(node->node_desc)); - objects--; - } - /* - * Static nodes only use static memory, and dynamic nodes have the data - * area embedded in the dynode object. In neither case is data memory - * allocated separately, and thus, it need never be freed separately. - * Therefore we *must not* check CTLFLAG_OWNDATA here. + * Remove the node from the tree. The procedure depends on whether the + * node is static (prevp == NULL) or dynamic (prevp != NULL). Also + * update versions and counters. */ - - assert(parent->node_csize > 0); - assert(parent->node_clen > 0); - - /* - * Dynamic nodes must be freed. Freeing the dynode object also frees - * the node name and any associated data. Static nodes are zeroed out, - * and the static memory they referenced will become inaccessible. - */ - if (prevp != NULL) { - dynode = *prevp; - *prevp = dynode->dynode_next; - - free(dynode); - objects--; - - parent->node_csize--; - } else - memset(node, 0, sizeof(*node)); - - parent->node_clen--; - - nodes--; - - /* Bump the version of all nodes on the path to the destroyed node. */ - mib_upgrade(stack, depth, NULL); + mib_remove(node, prevp); return r; } @@ -914,6 +1004,15 @@ mib_describe(struct mib_call * call, struct mib_node * parent, if (!mib_authed(call)) return EPERM; + /* + * The node must not be a mount point. Arguably this + * check is not necessary, since we use the description + * of the preexisting underlying node anyway. + */ + if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE && + (node->node_flags & CTLFLAG_REMOTE)) + return EBUSY; + /* The node must not already have a description. */ if (node->node_desc != NULL) return EPERM; @@ -946,7 +1045,7 @@ mib_describe(struct mib_call * call, struct mib_node * parent, return EINVAL; /* do not return ENOMEM */ } - objects++; + mib_objects++; /* The description must now be freed with the node. */ node->node_flags |= CTLFLAG_OWNDESC; @@ -1086,6 +1185,14 @@ mib_write(struct mib_call * call, struct mib_node * node, return EINVAL; switch (SYSCTL_TYPE(node->node_flags)) { + case CTLTYPE_BOOL: + case CTLTYPE_INT: + case CTLTYPE_QUAD: + case CTLTYPE_STRUCT: + /* Non-string types must have an exact size match. */ + if (newlen != node->node_size) + return EINVAL; + break; case CTLTYPE_STRING: /* * Strings must not exceed their buffer size. There is a @@ -1096,20 +1203,12 @@ mib_write(struct mib_call * call, struct mib_node * node, if (newlen > node->node_size) return EINVAL; break; - case CTLTYPE_BOOL: - case CTLTYPE_INT: - case CTLTYPE_QUAD: - case CTLTYPE_STRUCT: - /* Non-string types must have an exact size match. */ - if (newlen != node->node_size) - return EINVAL; - break; default: return EINVAL; } /* - * If we cannot fit the data in the small stack buffer, then allocate a + * If we cannot fit the data in the scratch buffer, then allocate a * temporary buffer. We add one extra byte so that we can add a null * terminator at the end of strings in case userland did not supply * one. Either way, we must free the temporary buffer later! @@ -1138,7 +1237,7 @@ mib_write(struct mib_call * call, struct mib_node * node, return EINVAL; } - objects++; + mib_objects++; } else src = scratch; @@ -1194,7 +1293,7 @@ mib_write(struct mib_call * call, struct mib_node * node, if (src != scratch) { free(src); - objects--; + mib_objects--; } return r; @@ -1231,12 +1330,12 @@ mib_readwrite(struct mib_call * call, struct mib_node * node, * old data length on success, or a negative error code on failure. */ ssize_t -mib_dispatch(struct mib_call * call, struct mib_node * root, - struct mib_oldp * oldp, struct mib_newp * newp) +mib_dispatch(struct mib_call * call, struct mib_oldp * oldp, + struct mib_newp * newp) { - struct mib_node *stack[CTL_MAXNAME]; struct mib_node *parent, *node; - int id, depth, is_leaf, has_verify, has_func; + ssize_t r; + int id, is_leaf, can_restart, has_verify, has_func; assert(call->call_namelen <= CTL_MAXNAME); @@ -1244,15 +1343,7 @@ mib_dispatch(struct mib_call * call, struct mib_node * root, * Resolve the name by descending into the node tree, level by level, * starting at the MIB root. */ - depth = 0; - - for (parent = root; call->call_namelen > 0; parent = node) { - /* - * For node creation and destruction, build a node stack, to - * allow for up-propagation of new node version numbers. - */ - stack[depth++] = parent; - + for (parent = &mib_root; call->call_namelen > 0; parent = node) { id = call->call_name[0]; call->call_name++; call->call_namelen--; @@ -1276,14 +1367,11 @@ mib_dispatch(struct mib_call * call, struct mib_node * root, switch (id) { case CTL_QUERY: - return mib_query(call, parent, oldp, newp, - root); + return mib_query(call, parent, oldp, newp); case CTL_CREATE: - return mib_create(call, parent, oldp, newp, - stack, depth); + return mib_create(call, parent, oldp, newp); case CTL_DESTROY: - return mib_destroy(call, parent, oldp, newp, - stack, depth); + return mib_destroy(call, parent, oldp, newp); case CTL_DESCRIBE: return mib_describe(call, parent, oldp, newp); case CTL_CREATESYM: @@ -1301,6 +1389,32 @@ mib_dispatch(struct mib_call * call, struct mib_node * root, if ((node->node_flags & CTLFLAG_PRIVATE) && !mib_authed(call)) return EPERM; + /* + * Start by checking if the node is a remote node. If so, let + * a remote service handle the remainder of this request. + * However, as part of attempting the remote call, we may + * discover that the remote service has died or that it is + * unmounting the subtree. If the node was not a temporary + * mountpoint, we should (and do) continue with the request + * locally - if it was, it will already be deallocated and we + * must be very careful not to access 'node' again! + */ + is_leaf = (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE); + + if (!is_leaf && (node->node_flags & CTLFLAG_REMOTE)) { + /* Determine this before 'node' may disappear.. */ + can_restart = (node->node_flags & CTLFLAG_PARENT); + + r = mib_remote_call(call, node, oldp, newp); + + if (r != ERESTART || !can_restart) + return (r != ERESTART) ? r : ENOENT; + + /* Service died, subtree is unmounted, keep going. */ + assert(SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE); + assert(!(node->node_flags & CTLFLAG_REMOTE)); + } + /* * Is this a leaf node, and/or is this node handled by a * function? If either is true, resolution ends at this level. @@ -1308,7 +1422,6 @@ mib_dispatch(struct mib_call * call, struct mib_node * root, * different ways to determine whether there is a function * depending on whether the node is a leaf or not. */ - is_leaf = (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE); if (is_leaf) { has_verify = (node->node_flags & CTLFLAG_VERIFY); has_func = (!has_verify && node->node_func != NULL); @@ -1385,11 +1498,12 @@ mib_tree_recurse(struct mib_node * parent) if (node->node_flags == 0) continue; - nodes++; + mib_nodes++; parent->node_clen++; node->node_ver = parent->node_ver; + node->node_parent = parent; /* Recursively apply this function to all node children. */ if (SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE && @@ -1403,16 +1517,326 @@ mib_tree_recurse(struct mib_node * parent) * that could not be assigned at compile time. */ void -mib_tree_init(struct mib_node * root) +mib_tree_init(void) { /* Initialize some variables. */ - nodes = 1; /* the root node itself */ - objects = 0; + mib_nodes = 1; /* the root node itself */ + mib_objects = 0; - /* The entire tree starts with the same, nonzero node version. */ - root->node_ver = 1; + /* + * The entire tree starts with the same, nonzero node version. + * The root node is the only node without a parent. + */ + mib_root.node_ver = 1; + mib_root.node_parent = NULL; /* Recursively initialize the static tree. */ - mib_tree_recurse(root); + mib_tree_recurse(&mib_root); +} + +/* + * Process a subtree mount request from a remote service. Return OK on + * success, with a pointer to the resulting static-node structure stored in + * 'nodep'. Return a negative error code on failure. + */ +int +mib_mount(const int * mib, unsigned int miblen, unsigned int eid, uint32_t rid, + uint32_t flags, unsigned int csize, unsigned int clen, + struct mib_node ** nodep) +{ + struct mib_dynode *dynode, **prevp; + struct mib_node *parent, *node; + char name[SYSCTL_NAMELEN], *desc; + size_t size, namelen, desclen; + unsigned int n; + int r, id; + + /* + * Perform initial verification of the given parameters. Even stricter + * checks may be performed later. + */ + /* + * By policy, we forbid mounting top-level nodes. This is in effect + * also the only security-like restriction: a service should not be + * able to just take over, say, the entire "kern" subtree. There is + * currently little in the way of a service taking over an important + * set of second-level nodes, though. + * + * TODO: allow mounting of predefined mount points only, for example by + * having an internal node flag that permits mounting the subtree or + * any node in it. As an even better alternative, allow this to be + * controlled through a policy specification; unfortunately, this would + * also add a substantial amount of infrastructure. + */ + if (miblen < 2) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, path too short\n")); + + return EPERM; + } + + /* + * The flags field is highly restricted right now. Only a few flags + * may be given at all, and then when using an existing node as mount + * point, the flag must exactly match the existing node's flags. + */ + if (SYSCTL_VERS(flags) != SYSCTL_VERSION || + SYSCTL_TYPE(flags) != CTLTYPE_NODE || + (SYSCTL_FLAGS(flags) & ~(CTLFLAG_READONLY | CTLFLAG_READWRITE | + CTLFLAG_PERMANENT | CTLFLAG_HIDDEN)) != 0) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, invalid flags %"PRIx32 + "\n", flags)); + + return EINVAL; + } + + if (csize > (1U << MIB_RC_BITS) || clen > csize) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, invalid child size or " + "length (%u, %u)\n", csize, clen)); + + return EINVAL; + } + + /* + * Look up the parent node of the mount point. This parent node must + * exist - we don't want to create more than one temporary node in any + * case. All the nodes leading up to and including the parent node + * must be real, local, non-private, node-type nodes. The path may not + * be private, because that would allow an unprivileged service to + * intercept writes to privileged nodes--currently a total nonissue in + * practice, but still. Note that the service may itself restrict + * access to nodes in its own mounted subtree in any way it wishes. + */ + parent = &mib_root; + + for (n = 0; n < miblen - 1; n++) { + /* Meta-identifiers are obviously not allowed in the path. */ + if ((id = mib[n]) < 0) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, meta-ID in " + "path\n")); + + return EINVAL; + } + + /* Locate the child node. */ + if ((node = mib_find(parent, id, NULL /*prevp*/)) == NULL) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, path not " + "found\n")); + + return ENOENT; + } + + /* Make sure it is a regular node-type node. */ + if (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE || + !(node->node_flags & CTLFLAG_PARENT) || + (node->node_flags & (CTLFLAG_REMOTE | CTLFLAG_PRIVATE))) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, unacceptable " + "node on path\n")); + + return EPERM; + } + + parent = node; + } + + /* Now see if the mount point itself exists. */ + if ((id = mib[miblen - 1]) < 0) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, meta-ID in path\n")); + + return EINVAL; + } + + /* + * If the target node exists and passes all tests, it will simply be + * converted to a mount point. If the target node does not exist, we + * have to allocate a temporary node as mount point. + */ + if ((node = mib_find(parent, id, NULL /*prevp*/)) != NULL) { + /* + * We are about to mount on an existing node. As stated above, + * the node flags must match the given flags exactly. + */ + if (SYSCTL_TYPE(node->node_flags) != CTLTYPE_NODE || + SYSCTL_FLAGS(node->node_flags) != + (SYSCTL_FLAGS(flags) | CTLFLAG_PARENT)) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, target node " + "mismatch (%"PRIx32", %"PRIx32")\n", + node->node_flags, flags)); + + return EPERM; + } + + /* + * If the node has dynamically added children, we will not be + * able to restore the node to its old state when unmounting. + */ + if (node->node_size != node->node_csize) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, node has " + "dynamic children\n")); + + return EBUSY; + } + + mib_upgrade(node); + } else { + /* + * We are going to create a temporary mount point. Much of the + * procedure that follows is a rather selective extract from + * mib_create(). Start with a check for the impossible. + */ + if (parent->node_csize == INT_MAX) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, parent node " + "full\n")); + + return EINVAL; + } + + /* + * In order to create the new node, we also need the node's + * name and description; those did not fit in the request + * message. Ask the caller to copy these strings to us. + */ + name[0] = '\0'; + scratch[0] = '\0'; + + if ((r = mib_remote_info(eid, rid, name, sizeof(name), scratch, + MAXDESCLEN)) != OK) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, node info " + "request yielded %d\n", r)); + + return r; + } + + /* Make sure the name is valid. */ + if ((namelen = mib_check_name(name, sizeof(name))) == 0) { + printf("MIB: mounting failed, bad name\n"); + + return EINVAL; + } + + /* Just forcefully terminate the description. */ + scratch[MAXDESCLEN - 1] = '\0'; + desclen = strlen(scratch); + + /* + * We know the identifier is not in use yet; make sure that the + * name is not, either. As a side effect, find out where the + * new node should be inserted upon success. + */ + if (mib_scan(parent, id, name, &id /*unused*/, &prevp, + &node /*unused*/) != OK) { + MIB_DEBUG_MOUNT(("MIB: mounting failed, name " + "conflict\n")); + + return EEXIST; + } + + /* + * Allocate a dynamic node. Unlike for user-created dynamic + * nodes, temporary mount points also include the description + * in the dynode object. + */ + size = sizeof(*dynode) + namelen + desclen + 1; + + if ((dynode = malloc(size)) == NULL) { + printf("MIB: out of memory!\n"); + + return ENOMEM; + } + mib_objects++; + + /* Initialize the dynamic node. */ + memset(dynode, 0, sizeof(*dynode)); + dynode->dynode_id = id; + strlcpy(dynode->dynode_name, name, namelen + 1); + desc = &dynode->dynode_name[namelen + 1]; + strlcpy(desc, scratch, desclen + 1); + + node = &dynode->dynode_node; + node->node_flags = flags & ~SYSCTL_VERS_MASK; + node->node_size = 0; + node->node_parent = parent; + node->node_name = dynode->dynode_name; + node->node_desc = desc; + + /* + * Add the new dynamic node into the tree, and adjust versions + * and counters. + */ + mib_add(dynode, prevp); + } + + /* Success! Perform the actual mount, and return the target node. */ + node->node_flags |= CTLFLAG_REMOTE; + node->node_eid = eid; + node->node_rcsize = csize; + node->node_rclen = clen; + node->node_rid = rid; + + mib_remotes++; + + *nodep = node; + return OK; +} + +/* + * Unmount the remote subtree identified by the given node. Release the mount + * point by reversing the action performed while mounting. Also bump the + * version numbers on the path, so that userland knows that it is to expect a + * change of contents in the subtree. This function always succeeds, and may + * deallocate the given node. + */ +void +mib_unmount(struct mib_node * node) +{ + struct mib_dynode **prevp; + struct mib_node *child; + int id; + + assert(SYSCTL_TYPE(node->node_flags) == CTLTYPE_NODE); + assert(node->node_flags & CTLFLAG_REMOTE); + + /* + * Given that the node has the CTLFLAG_REMOTE flag set, we can now tell + * whether the remote subtree obscured a preexisting node or we created + * a temporary mount point, by checking its CTLFLAG_PARENT flag. + */ + if (node->node_flags & CTLFLAG_PARENT) { + /* + * Return the node to its former pre-mount state. Restore the + * original node_clen field by recomputing it. + */ + node->node_flags &= ~CTLFLAG_REMOTE; + node->node_csize = node->node_size; + node->node_clen = 0; + + for (id = 0; IS_STATIC_ID(node, id); id++) { + child = &node->node_scptr[id]; + + if (child->node_flags != 0) + node->node_clen++; + } + + node->node_dcptr = NULL; + + /* Increase version numbers on the path to the node. */ + mib_upgrade(node); + } else { + /* + * We know that we dynamically allocated this node; find its + * parent's pointer to it. + */ + for (prevp = &node->node_parent->node_dcptr; *prevp != NULL; + prevp = &(*prevp)->dynode_next) { + if (&(*prevp)->dynode_node == node) + break; + } + assert(*prevp != NULL); + + /* Free the node, and adjust counts and versions. */ + mib_remove(node, prevp); + } + + assert(mib_remotes > 0); + mib_remotes--; } diff --git a/minix/tests/Makefile b/minix/tests/Makefile index c4de4425d..3d62445dc 100644 --- a/minix/tests/Makefile +++ b/minix/tests/Makefile @@ -20,6 +20,7 @@ LDADD+= -lm SUBDIR+= blocktest SUBDIR+= ddekit +SUBDIR+= rmibtest # Some have special flags compiling CPPFLAGS.test56.c += -D_MINIX_SYSTEM=1 @@ -77,7 +78,7 @@ PROGS+= t10a t11a t11b t40a t40b t40c t40d t40e t40f t40g t60a t60b \ t67a t67b t68a t68b tvnd t84_h_spawn t84_h_spawnattr SCRIPTS+= run check-install testinterp.sh testsh1.sh testsh2.sh testmfs.sh \ - testisofs.sh testvnd.sh testkyua.sh testrelpol.sh + testisofs.sh testvnd.sh testkyua.sh testrelpol.sh testrmib.sh # test57loop.S is not linked into the .bcl file. # This way, we can link it in when linking the final binary diff --git a/minix/tests/rmibtest/Makefile b/minix/tests/rmibtest/Makefile new file mode 100644 index 000000000..54abf55c7 --- /dev/null +++ b/minix/tests/rmibtest/Makefile @@ -0,0 +1,14 @@ +# Makefile for the Remote MIB test service (rmibtest) +PROG= rmibtest +SRCS= rmibtest.c +FILES= rmibtest.conf + +DPADD+= ${LIBSYS} +LDADD+= -lsys + +MAN= + +BINDIR?= /usr/tests/minix-posix/rmibtest +FILESDIR?= /usr/tests/minix-posix/rmibtest + +.include diff --git a/minix/tests/rmibtest/rmibtest.c b/minix/tests/rmibtest/rmibtest.c new file mode 100644 index 000000000..eec1c3a4b --- /dev/null +++ b/minix/tests/rmibtest/rmibtest.c @@ -0,0 +1,267 @@ +/* Remote MIB (RMIB) test service - by D.C. van Moolenbroek */ +/* + * This test is a good start, but not an exhaustive coverage test for all + * possible failure cases. The reason for that is mainly that there are + * various scenarios that we cannot generate without implementing our own local + * bogus RMIB code. Adding that is something for later - TODO. + */ +#include +#include +#include + +static int running; + +/* The following is a copy of the minix.test subtree in the MIB service. */ +static char test_string[16], test_struct[12]; + +static struct rmib_node minix_test_secret_table[] = { +/* 0*/ [SECRET_VALUE] = RMIB_INT(RMIB_RO, 12345, "value", + "The combination to my luggage"), +}; + +static struct rmib_node minix_test_table[] = { +/* 0*/ [TEST_INT] = RMIB_INT(RMIB_RO | CTLFLAG_HEX, 0x01020304, + "int", "Value test field"), +/* 1*/ [TEST_BOOL] = RMIB_BOOL(RMIB_RW, 0, "bool", + "Boolean test field"), +/* 2*/ [TEST_QUAD] = RMIB_QUAD(RMIB_RW, 0, "quad", + "Quad test field"), +/* 3*/ [TEST_STRING] = RMIB_STRING(RMIB_RW, test_string, "string", + "String test field"), +/* 4*/ [TEST_STRUCT] = RMIB_STRUCT(RMIB_RW, sizeof(test_struct), + test_struct, "struct", + "Structure test field"), +/* 5*/ [TEST_PRIVATE] = RMIB_INT(RMIB_RW | CTLFLAG_PRIVATE, -5375, + "private", "Private test field"), +/* 6*/ [TEST_ANYWRITE] = RMIB_INT(RMIB_RW | CTLFLAG_ANYWRITE, 0, + "anywrite", "AnyWrite test field"), +/* 7*/ [TEST_DYNAMIC] = RMIB_INT(RMIB_RO, 0, "deleteme", + "This node will be destroyed"), +/* 8*/ [TEST_SECRET] = RMIB_NODE(RMIB_RO | CTLFLAG_PRIVATE, + minix_test_secret_table, "secret", + "Private subtree"), +/* 9*/ [TEST_PERM] = RMIB_INT(RMIB_RO, 1, "permanent", NULL), +/*10*/ [TEST_DESTROY1] = RMIB_INT(RMIB_RO, 123, "destroy1", NULL), +/*11*/ [TEST_DESTROY2] = RMIB_INT(RMIB_RO, 456, "destroy2", + "This node will be destroyed"), +}; + +static struct rmib_node minix_test = RMIB_NODE(RMIB_RW | CTLFLAG_HIDDEN, + minix_test_table, "test", "Test87 testing ground"); +/* Here ends the copy of the minix.test subtree in the MIB service. */ + +static struct rmib_node test_table[] = { +}; + +static struct rmib_node test_rnode = RMIB_NODE(RMIB_RO, test_table, "test", + "Test node"); + +static int value = 5375123; + +static ssize_t test_func(struct rmib_call *, struct rmib_node *, + struct rmib_oldp *, struct rmib_newp *); + +/* No defined constants because userland will access these by name anyway. */ +static struct rmib_node minix_rtest_table[] = { + [1] = RMIB_INTPTR(RMIB_RW, &value, "int", + "Test description"), + [2] = RMIB_FUNC(CTLTYPE_INT | RMIB_RW, sizeof(int), + test_func, "func", "Test function"), +}; + +static struct rmib_node minix_rtest = RMIB_NODE(RMIB_RO, minix_rtest_table, + "rtest", "Remote test subtree"); + +/* + * Test function that deflects reads and writes to its sibling node. Not a + * super useful thing to do, but a decent test of functionality regardless. + */ +static ssize_t +test_func(struct rmib_call * call, struct rmib_node * node, + struct rmib_oldp * oldp, struct rmib_newp * newp) +{ + + return rmib_readwrite(call, &minix_rtest_table[1], oldp, newp); +} + +/* + * Attempt to perform registrations that should be rejected locally, and thus + * result in failure immediately. Unfortunately, we cannot verify that the MIB + * service also verifies these aspects remotely, at least without talking to it + * directly. + */ +static void +test_local_failures(void) +{ + int r, mib[CTL_SHORTNAME + 1]; + + memset(mib, 0, sizeof(mib)); + + /* Test an empty path. */ + if ((r = rmib_register(mib, 0, &test_rnode)) != EINVAL) + panic("registering remote MIB subtree yielded: %d", r); + + /* Test a path that is too long. */ + if ((r = rmib_register(mib, CTL_SHORTNAME + 1, &test_rnode)) != EINVAL) + panic("registering remote MIB subtree yielded: %d", r); + + /* Test a mount point that is not a node-type (parent) node. */ + mib[0] = CTL_MINIX; + mib[1] = MINIX_TEST; + mib[2] = TEST_INT; + if ((r = rmib_register(mib, 3, &minix_test_table[TEST_INT])) != EINVAL) + panic("registering remote MIB subtree yielded: %d", r); +} + +/* + * Perform a number of registrations that will not be accepted by the MIB + * service. We will never know, but the userland test script can verify the + * difference by comparing the number of remotes before and after. + */ +static void +test_remote_failures(void) +{ + int r, mib[CTL_SHORTNAME]; + + /* Test an existing one-node path. */ + mib[0] = CTL_KERN; + if ((r = rmib_register(mib, 1, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path in which a non-final component does not exist. */ + mib[1] = CREATE_BASE - 1; /* probably as safe as it gets.. */ + mib[2] = 0; + if ((r = rmib_register(mib, 3, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path in which a non-final component is not a parent node. */ + mib[1] = KERN_OSTYPE; + if ((r = rmib_register(mib, 3, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path in which a non-final component is a meta-identifier. */ + mib[1] = CTL_QUERY; + if ((r = rmib_register(mib, 3, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path in which the final component is a meta-identifier. */ + if ((r = rmib_register(mib, 2, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path in which the final component identifies a non-parent. */ + mib[1] = KERN_OSTYPE; + if ((r = rmib_register(mib, 2, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path with unacceptable flags for the final component. */ + mib[0] = CTL_MINIX; + mib[1] = MINIX_TEST; + mib[2] = TEST_SECRET; + if ((r = rmib_register(mib, 3, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + rmib_reset(); + + /* Test a path of which the name, but not the ID, already exists. */ + mib[1] = CREATE_BASE - 1; + if ((r = rmib_register(mib, 2, &test_rnode)) != OK) + panic("unable to register remote MIB subtree: %d", r); + /* + * Do NOT call rmib_reset() anymore now: we want to let the MIB service + * get the name from us. + */ +} + +static int +init(int type __unused, sef_init_info_t * info __unused) +{ + const int new_mib[] = { CTL_MINIX, CREATE_BASE - 2 }; + const int shadow_mib[] = { CTL_MINIX, MINIX_TEST }; + int r; + + test_local_failures(); + + test_remote_failures(); + + /* + * We must now register our new test tree before shadowing minix.test, + * because if any of the previous requests actually did succeed, the + * next registration will be rejected (ID 0 already in use) and no + * difference would be detected because of "successful" shadowing. + */ + r = rmib_register(new_mib, __arraycount(new_mib), &minix_rtest); + if (r != OK) + panic("unable to register remote MIB subtree: %d", r); + + r = rmib_register(shadow_mib, __arraycount(shadow_mib), &minix_test); + if (r != OK) + panic("unable to register remote MIB subtree: %d", r); + + running = TRUE; + + return OK; +} + +static void +cleanup(void) +{ + int r; + + if ((r = rmib_deregister(&minix_rtest)) != OK) + panic("unable to deregister: %d", r); + if ((r = rmib_deregister(&minix_test)) != OK) + panic("unable to deregister: %d", r); + + /* + * TODO: the fact that the MIB service can currently not detect the + * death of other services is creating somewhat of a problem here: if + * we deregister shortly before exiting, the asynchronous deregister + * requests may not be delivered before we actually exit (and take our + * asynsend table with us), and leave around the remote subtrees until + * a user process tries accessing them. We work around this here by + * delaying the exit by half a second - shorter than RS's timeout, but + * long enough to allow deregistration. + */ + sys_setalarm(sys_hz() / 2, 0); + + running = FALSE; +} + +static void +got_signal(int sig) +{ + + if (sig == SIGTERM && running) + cleanup(); +} + +int +main(void) +{ + message m; + int r, ipc_status; + + sef_setcb_init_fresh(init); + sef_setcb_signal_handler(got_signal); + + sef_startup(); + + for (;;) { + r = sef_receive_status(ANY, &m, &ipc_status); + + if (r != OK) + panic("sef_receive_status failed: %d", r); + + if (m.m_source == CLOCK && is_ipc_notify(ipc_status)) + break; /* the intended exit path; see above */ + if (m.m_source == MIB_PROC_NR) + rmib_process(&m, ipc_status); + } + + return EXIT_SUCCESS; +} diff --git a/minix/tests/rmibtest/rmibtest.conf b/minix/tests/rmibtest/rmibtest.conf new file mode 100644 index 000000000..b76e99921 --- /dev/null +++ b/minix/tests/rmibtest/rmibtest.conf @@ -0,0 +1,2 @@ +service rmibtest { +}; diff --git a/minix/tests/run b/minix/tests/run index f70172123..2f79fea42 100755 --- a/minix/tests/run +++ b/minix/tests/run @@ -24,13 +24,13 @@ export USENETWORK # set to "yes" for test48+82 to use the network setuids="test11 test33 test43 test44 test46 test56 test60 test61 test65 \ test69 test73 test74 test78 test83 test85 test87 test88 test89" # Scripts that require to be run as root -rootscripts="testisofs testvnd testrelpol" +rootscripts="testisofs testvnd testrmib testrelpol" alltests="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 \ 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 \ - 81 82 83 84 85 86 87 88 89 sh1 sh2 interp mfs isofs vnd" + 81 82 83 84 85 86 87 88 89 sh1 sh2 interp mfs isofs vnd rmib" tests_no=`expr 0` # If root, make sure the setuid tests have the correct permissions diff --git a/minix/tests/test87.c b/minix/tests/test87.c index 7a0b774a1..99c621d0e 100644 --- a/minix/tests/test87.c +++ b/minix/tests/test87.c @@ -625,7 +625,7 @@ sub87b(void) */ if (scn[0].sysctl_num != TEST_INT) e(0); if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[0].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE | CTLFLAG_HEX)) e(0); if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[0].sysctl_name, "int")) e(0); @@ -638,7 +638,7 @@ sub87b(void) break; if (i == count) e(0); if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[i].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READWRITE | CTLFLAG_PRIVATE | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[i].sysctl_name, "private")) e(0); @@ -650,7 +650,7 @@ sub87b(void) break; if (i == count) e(0); if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_NODE) e(0); - if (SYSCTL_FLAGS(scn[i].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READONLY | CTLFLAG_PRIVATE)) e(0); if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[i].sysctl_name, "secret")) e(0); @@ -747,11 +747,13 @@ test87b(void) * order for at least the static nodes. We do not make assumptions * about whether dynamic nodes are merged in or (as is the case as of * writing) returned after the static nodes. At this point there - * should be no dynamic nodes here yet anyway. + * should be no dynamic nodes here yet anyway. We mostly ignore + * CTLFLAG_PERMANENT in order to facilitate running this test on a + * remotely mounted subtree. */ if (scn[0].sysctl_num != TEST_INT) e(0); if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[0].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE | CTLFLAG_HEX)) e(0); if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[0].sysctl_name, "int")) e(0); @@ -761,7 +763,7 @@ test87b(void) if (scn[1].sysctl_num != TEST_BOOL) e(0); if (SYSCTL_TYPE(scn[1].sysctl_flags) != CTLTYPE_BOOL) e(0); - if (SYSCTL_FLAGS(scn[1].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[1].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[1].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[1].sysctl_name, "bool")) e(0); @@ -771,7 +773,7 @@ test87b(void) if (scn[2].sysctl_num != TEST_QUAD) e(0); if (SYSCTL_TYPE(scn[2].sysctl_flags) != CTLTYPE_QUAD) e(0); - if (SYSCTL_FLAGS(scn[2].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[2].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[2].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[2].sysctl_name, "quad")) e(0); @@ -781,7 +783,8 @@ test87b(void) if (scn[3].sysctl_num != TEST_STRING) e(0); if (SYSCTL_TYPE(scn[3].sysctl_flags) != CTLTYPE_STRING) e(0); - if (SYSCTL_FLAGS(scn[3].sysctl_flags) != CTLFLAG_READWRITE) e(0); + if ((SYSCTL_FLAGS(scn[3].sysctl_flags) & ~CTLFLAG_PERMANENT) != + CTLFLAG_READWRITE) e(0); if (SYSCTL_VERS(scn[3].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[3].sysctl_name, "string")) e(0); if (scn[3].sysctl_ver == 0) e(0); @@ -789,7 +792,8 @@ test87b(void) if (scn[4].sysctl_num != TEST_STRUCT) e(0); if (SYSCTL_TYPE(scn[4].sysctl_flags) != CTLTYPE_STRUCT) e(0); - if (SYSCTL_FLAGS(scn[4].sysctl_flags) != CTLFLAG_READWRITE) e(0); + if ((SYSCTL_FLAGS(scn[4].sysctl_flags) & ~CTLFLAG_PERMANENT) != + CTLFLAG_READWRITE) e(0); if (SYSCTL_VERS(scn[4].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[4].sysctl_name, "struct")) e(0); if (scn[4].sysctl_ver == 0) e(0); @@ -797,7 +801,7 @@ test87b(void) if (scn[5].sysctl_num != TEST_PRIVATE) e(0); if (SYSCTL_TYPE(scn[5].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[5].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[5].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READWRITE | CTLFLAG_PRIVATE | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[5].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[5].sysctl_name, "private")) e(0); @@ -807,7 +811,7 @@ test87b(void) if (scn[6].sysctl_num != TEST_ANYWRITE) e(0); if (SYSCTL_TYPE(scn[6].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[6].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[6].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READWRITE | CTLFLAG_ANYWRITE | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[6].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[6].sysctl_name, "anywrite")) e(0); @@ -818,7 +822,7 @@ test87b(void) if (scn[i].sysctl_num != TEST_SECRET) e(0); if (SYSCTL_TYPE(scn[i].sysctl_flags) != CTLTYPE_NODE) e(0); - if (SYSCTL_FLAGS(scn[i].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[i].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READONLY | CTLFLAG_PRIVATE)) e(0); if (SYSCTL_VERS(scn[i].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[i].sysctl_name, "secret")) e(0); @@ -895,7 +899,7 @@ test87b(void) if (scn[0].sysctl_num != SECRET_VALUE) e(0); if (SYSCTL_TYPE(scn[0].sysctl_flags) != CTLTYPE_INT) e(0); - if (SYSCTL_FLAGS(scn[0].sysctl_flags) != + if ((SYSCTL_FLAGS(scn[0].sysctl_flags) & ~CTLFLAG_PERMANENT) != (CTLFLAG_READONLY | CTLFLAG_IMMEDIATE)) e(0); if (SYSCTL_VERS(scn[0].sysctl_flags) != SYSCTL_VERSION) e(0); if (strcmp(scn[0].sysctl_name, "value")) e(0); diff --git a/minix/tests/testrmib.sh b/minix/tests/testrmib.sh new file mode 100755 index 000000000..3bd8c06f4 --- /dev/null +++ b/minix/tests/testrmib.sh @@ -0,0 +1,74 @@ +#!/bin/sh + +# Shell script used to test the Remote MIB (RMIB) functionality. + +# We test a couple of things here, using the rmibtest service and test87: +# - some cases where remote MIB subtree registration should fail; +# - a new mount point (minix.rtest) with a small tree behind it, on which we +# test some basic reads and writes on an integer pointer and a function; +# - shadowing of an existing subtree (minix.test) with a similarly looking +# subtree, which we then subject to a subset of test87; +# - resource accounting, making sure everything is the same before and after. + +bomb() { + echo $* + service down rmibtest 2>/dev/null + exit 1 +} + +PATH=/bin:/usr/bin:/sbin:/usr/sbin +export PATH + +echo -n "Test RMIB " + +cd rmibtest + +sysctl -q minix.rtest && bomb "there should not be a minix.rtest" + +old_nodes=`sysctl -n minix.mib.nodes 2>/dev/null` || bomb "no MIB stats?" +old_objects=`sysctl -n minix.mib.objects 2>/dev/null` || bomb "no MIB stats?" +old_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || bomb "no MIB stats?" + +service up `pwd`/rmibtest -label rmibtest -config rmibtest.conf || \ + bomb "unable to start test service" + +cd .. + +sleep 1 + +new_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || \ + bomb "unable to get mount stats" +[ $(($old_remotes + 2)) -eq $new_remotes ] || bomb "mounting subtree failed" + +# Test the temporary minix.rtest subtree with its two mirroring nodes +sysctl -q minix.rtest || bomb "there should be a minix.rtest" + +[ $(sysctl -n minix.rtest.int) -eq 5375123 ] || bomb "unexpected int value" +[ $(sysctl -n minix.rtest.func) -eq 5375123 ] || bomb "unexpected func value" +sysctl -wq minix.rtest.int=456 || bomb "unable to set int value" +[ $(sysctl -n minix.rtest.int) -eq 456 ] || bomb "unexpected int value" +[ $(sysctl -n minix.rtest.func) -eq 456 ] || bomb "unexpected func value" +sysctl -wq minix.rtest.func=7895375 || bomb "unable to set func value" +[ $(sysctl -n minix.rtest.int) -eq 7895375 ] || bomb "unexpected int value" +[ $(sysctl -n minix.rtest.func) -eq 7895375 ] || bomb "unexpected func value" + +# Test the minix.test shadowing subtree using a subset of the regular MIB test +./test87 19 >/dev/null || bomb "test87 reported failure" + +service down rmibtest + +sleep 1 + +# Is everything back to the old situation? +new_nodes=`sysctl -n minix.mib.nodes 2>/dev/null` || bomb "no MIB stats?" +new_objects=`sysctl -n minix.mib.objects 2>/dev/null` || bomb "no MIB stats?" +new_remotes=`sysctl -n minix.mib.remotes 2>/dev/null` || bomb "no MIB stats?" + +[ $old_nodes -eq $new_nodes ] || bomb "stats not equal after unmount" +[ $old_objects -eq $new_objects ] || bomb "stats not equal after unmount" +[ $old_remotes -eq $new_remotes ] || bomb "stats not equal after unmount" + +sysctl -q minix.rtest && bomb "there should not be a minix.rtest anymore" + +echo "ok" +exit 0