1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
8 * Copyright (C) 2007 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation, version 2.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
20 #include <linux/module.h>
22 #include <linux/miscdevice.h>
23 #include <linux/mutex.h>
24 #include <linux/reboot.h>
25 #include <asm/uaccess.h>
27 #include "stackglue.h"
31 * The control protocol starts with a handshake. Until the handshake
32 * is complete, the control device will fail all write(2)s.
34 * The handshake is simple. First, the client reads until EOF. Each line
35 * of output is a supported protocol tag. All protocol tags are a single
36 * character followed by a two hex digit version number. Currently the
37 * only things supported is T01, for "Text-base version 0x01". Next, the
38 * client writes the version they would like to use, including the newline.
39 * Thus, the protocol tag is 'T01\n'. If the version tag written is
40 * unknown, -EINVAL is returned. Once the negotiation is complete, the
41 * client can start sending messages.
43 * The T01 protocol has three messages. First is the "SETN" message.
44 * It has the following syntax:
46 * SETN<space><8-char-hex-nodenum><newline>
48 * This is 14 characters.
50 * The "SETN" message must be the first message following the protocol.
51 * It tells ocfs2_control the local node number.
53 * Next comes the "SETV" message. It has the following syntax:
55 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
57 * This is 11 characters.
59 * The "SETV" message sets the filesystem locking protocol version as
60 * negotiated by the client. The client negotiates based on the maximum
61 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
62 * number from the "SETV" message must match
63 * user_stack.sp_proto->lp_max_version.pv_major, and the minor number
64 * must be less than or equal to ...->lp_max_version.pv_minor.
66 * Once this information has been set, mounts will be allowed. From this
67 * point on, the "DOWN" message can be sent for node down notification.
68 * It has the following syntax:
70 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
74 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
76 * This is 47 characters.
80 * Whether or not the client has done the handshake.
81 * For now, we have just one protocol version.
83 #define OCFS2_CONTROL_PROTO "T01\n"
84 #define OCFS2_CONTROL_PROTO_LEN 4
86 /* Handshake states */
87 #define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
88 #define OCFS2_CONTROL_HANDSHAKE_READ (1)
89 #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
90 #define OCFS2_CONTROL_HANDSHAKE_VALID (3)
93 #define OCFS2_CONTROL_MESSAGE_OP_LEN 4
94 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
95 #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
96 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
97 #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
98 #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
99 #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
100 #define OCFS2_TEXT_UUID_LEN 32
101 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
102 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
105 * ocfs2_live_connection is refcounted because the filesystem and
106 * miscdevice sides can detach in different order. Let's just be safe.
108 struct ocfs2_live_connection {
109 struct list_head oc_list;
110 struct ocfs2_cluster_connection *oc_conn;
113 struct ocfs2_control_private {
114 struct list_head op_list;
117 struct ocfs2_protocol_version op_proto;
120 /* SETN<space><8-char-hex-nodenum><newline> */
121 struct ocfs2_control_message_setn {
122 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
124 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
128 /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
129 struct ocfs2_control_message_setv {
130 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
132 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
134 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
138 /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
139 struct ocfs2_control_message_down {
140 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
142 char uuid[OCFS2_TEXT_UUID_LEN];
144 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
148 union ocfs2_control_message {
149 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
150 struct ocfs2_control_message_setn u_setn;
151 struct ocfs2_control_message_setv u_setv;
152 struct ocfs2_control_message_down u_down;
155 static atomic_t ocfs2_control_opened;
156 static int ocfs2_control_this_node = -1;
157 static struct ocfs2_protocol_version running_proto;
159 static LIST_HEAD(ocfs2_live_connection_list);
160 static LIST_HEAD(ocfs2_control_private_list);
161 static DEFINE_MUTEX(ocfs2_control_lock);
163 static inline void ocfs2_control_set_handshake_state(struct file *file,
166 struct ocfs2_control_private *p = file->private_data;
170 static inline int ocfs2_control_get_handshake_state(struct file *file)
172 struct ocfs2_control_private *p = file->private_data;
176 static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
178 size_t len = strlen(name);
179 struct ocfs2_live_connection *c;
181 BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
183 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
184 if ((c->oc_conn->cc_namelen == len) &&
185 !strncmp(c->oc_conn->cc_name, name, len))
193 * ocfs2_live_connection structures are created underneath the ocfs2
194 * mount path. Since the VFS prevents multiple calls to
195 * fill_super(), we can't get dupes here.
197 static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
198 struct ocfs2_live_connection **c_ret)
201 struct ocfs2_live_connection *c;
203 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
207 mutex_lock(&ocfs2_control_lock);
210 if (atomic_read(&ocfs2_control_opened))
211 list_add(&c->oc_list, &ocfs2_live_connection_list);
214 "ocfs2: Userspace control daemon is not present\n");
218 mutex_unlock(&ocfs2_control_lock);
229 * This function disconnects the cluster connection from ocfs2_control.
230 * Afterwards, userspace can't affect the cluster connection.
232 static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
234 mutex_lock(&ocfs2_control_lock);
235 list_del_init(&c->oc_list);
237 mutex_unlock(&ocfs2_control_lock);
242 static int ocfs2_control_cfu(void *target, size_t target_len,
243 const char __user *buf, size_t count)
245 /* The T01 expects write(2) calls to have exactly one command */
246 if ((count != target_len) ||
247 (count > sizeof(union ocfs2_control_message)))
250 if (copy_from_user(target, buf, target_len))
256 static ssize_t ocfs2_control_validate_protocol(struct file *file,
257 const char __user *buf,
261 char kbuf[OCFS2_CONTROL_PROTO_LEN];
263 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
268 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
271 ocfs2_control_set_handshake_state(file,
272 OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
277 static void ocfs2_control_send_down(const char *uuid,
280 struct ocfs2_live_connection *c;
282 mutex_lock(&ocfs2_control_lock);
284 c = ocfs2_connection_find(uuid);
286 BUG_ON(c->oc_conn == NULL);
287 c->oc_conn->cc_recovery_handler(nodenum,
288 c->oc_conn->cc_recovery_data);
291 mutex_unlock(&ocfs2_control_lock);
295 * Called whenever configuration elements are sent to /dev/ocfs2_control.
296 * If all configuration elements are present, try to set the global
297 * values. If there is a problem, return an error. Skip any missing
298 * elements, and only bump ocfs2_control_opened when we have all elements
299 * and are successful.
301 static int ocfs2_control_install_private(struct file *file)
305 struct ocfs2_control_private *p = file->private_data;
307 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
309 mutex_lock(&ocfs2_control_lock);
311 if (p->op_this_node < 0) {
313 } else if ((ocfs2_control_this_node >= 0) &&
314 (ocfs2_control_this_node != p->op_this_node)) {
319 if (!p->op_proto.pv_major) {
321 } else if (!list_empty(&ocfs2_live_connection_list) &&
322 ((running_proto.pv_major != p->op_proto.pv_major) ||
323 (running_proto.pv_minor != p->op_proto.pv_minor))) {
329 ocfs2_control_this_node = p->op_this_node;
330 running_proto.pv_major = p->op_proto.pv_major;
331 running_proto.pv_minor = p->op_proto.pv_minor;
335 mutex_unlock(&ocfs2_control_lock);
338 /* We set the global values successfully */
339 atomic_inc(&ocfs2_control_opened);
340 ocfs2_control_set_handshake_state(file,
341 OCFS2_CONTROL_HANDSHAKE_VALID);
347 static int ocfs2_control_do_setnode_msg(struct file *file,
348 struct ocfs2_control_message_setn *msg)
352 struct ocfs2_control_private *p = file->private_data;
354 if (ocfs2_control_get_handshake_state(file) !=
355 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
358 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
359 OCFS2_CONTROL_MESSAGE_OP_LEN))
362 if ((msg->space != ' ') || (msg->newline != '\n'))
364 msg->space = msg->newline = '\0';
366 nodenum = simple_strtol(msg->nodestr, &ptr, 16);
370 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
371 (nodenum > INT_MAX) || (nodenum < 0))
373 p->op_this_node = nodenum;
375 return ocfs2_control_install_private(file);
378 static int ocfs2_control_do_setversion_msg(struct file *file,
379 struct ocfs2_control_message_setv *msg)
383 struct ocfs2_control_private *p = file->private_data;
384 struct ocfs2_protocol_version *max =
385 &user_stack.sp_proto->lp_max_version;
387 if (ocfs2_control_get_handshake_state(file) !=
388 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
391 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
392 OCFS2_CONTROL_MESSAGE_OP_LEN))
395 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
396 (msg->newline != '\n'))
398 msg->space1 = msg->space2 = msg->newline = '\0';
400 major = simple_strtol(msg->major, &ptr, 16);
403 minor = simple_strtol(msg->minor, &ptr, 16);
408 * The major must be between 1 and 255, inclusive. The minor
409 * must be between 0 and 255, inclusive. The version passed in
410 * must be within the maximum version supported by the filesystem.
412 if ((major == LONG_MIN) || (major == LONG_MAX) ||
413 (major > (u8)-1) || (major < 1))
415 if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
416 (minor > (u8)-1) || (minor < 0))
418 if ((major != max->pv_major) ||
419 (minor > max->pv_minor))
422 p->op_proto.pv_major = major;
423 p->op_proto.pv_minor = minor;
425 return ocfs2_control_install_private(file);
428 static int ocfs2_control_do_down_msg(struct file *file,
429 struct ocfs2_control_message_down *msg)
434 if (ocfs2_control_get_handshake_state(file) !=
435 OCFS2_CONTROL_HANDSHAKE_VALID)
438 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
439 OCFS2_CONTROL_MESSAGE_OP_LEN))
442 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
443 (msg->newline != '\n'))
445 msg->space1 = msg->space2 = msg->newline = '\0';
447 nodenum = simple_strtol(msg->nodestr, &p, 16);
451 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
452 (nodenum > INT_MAX) || (nodenum < 0))
455 ocfs2_control_send_down(msg->uuid, nodenum);
460 static ssize_t ocfs2_control_message(struct file *file,
461 const char __user *buf,
465 union ocfs2_control_message msg;
467 /* Try to catch padding issues */
468 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
469 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
471 memset(&msg, 0, sizeof(union ocfs2_control_message));
472 ret = ocfs2_control_cfu(&msg, count, buf, count);
476 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
477 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
478 OCFS2_CONTROL_MESSAGE_OP_LEN))
479 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
480 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
481 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
482 OCFS2_CONTROL_MESSAGE_OP_LEN))
483 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
484 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
485 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
486 OCFS2_CONTROL_MESSAGE_OP_LEN))
487 ret = ocfs2_control_do_down_msg(file, &msg.u_down);
492 return ret ? ret : count;
495 static ssize_t ocfs2_control_write(struct file *file,
496 const char __user *buf,
502 switch (ocfs2_control_get_handshake_state(file)) {
503 case OCFS2_CONTROL_HANDSHAKE_INVALID:
507 case OCFS2_CONTROL_HANDSHAKE_READ:
508 ret = ocfs2_control_validate_protocol(file, buf,
512 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
513 case OCFS2_CONTROL_HANDSHAKE_VALID:
514 ret = ocfs2_control_message(file, buf, count);
527 * This is a naive version. If we ever have a new protocol, we'll expand
528 * it. Probably using seq_file.
530 static ssize_t ocfs2_control_read(struct file *file,
535 char *proto_string = OCFS2_CONTROL_PROTO;
538 if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
541 to_write = OCFS2_CONTROL_PROTO_LEN - *ppos;
542 if (to_write > count)
544 if (copy_to_user(buf, proto_string + *ppos, to_write))
549 /* Have we read the whole protocol list? */
550 if (*ppos >= OCFS2_CONTROL_PROTO_LEN)
551 ocfs2_control_set_handshake_state(file,
552 OCFS2_CONTROL_HANDSHAKE_READ);
557 static int ocfs2_control_release(struct inode *inode, struct file *file)
559 struct ocfs2_control_private *p = file->private_data;
561 mutex_lock(&ocfs2_control_lock);
563 if (ocfs2_control_get_handshake_state(file) !=
564 OCFS2_CONTROL_HANDSHAKE_VALID)
567 if (atomic_dec_and_test(&ocfs2_control_opened)) {
568 if (!list_empty(&ocfs2_live_connection_list)) {
569 /* XXX: Do bad things! */
571 "ocfs2: Unexpected release of ocfs2_control!\n"
572 " Loss of cluster connection requires "
573 "an emergency restart!\n");
577 * Last valid close clears the node number and resets
578 * the locking protocol version
580 ocfs2_control_this_node = -1;
581 running_proto.pv_major = 0;
582 running_proto.pv_major = 0;
586 list_del_init(&p->op_list);
587 file->private_data = NULL;
589 mutex_unlock(&ocfs2_control_lock);
596 static int ocfs2_control_open(struct inode *inode, struct file *file)
598 struct ocfs2_control_private *p;
600 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
603 p->op_this_node = -1;
605 mutex_lock(&ocfs2_control_lock);
606 file->private_data = p;
607 list_add(&p->op_list, &ocfs2_control_private_list);
608 mutex_unlock(&ocfs2_control_lock);
613 static const struct file_operations ocfs2_control_fops = {
614 .open = ocfs2_control_open,
615 .release = ocfs2_control_release,
616 .read = ocfs2_control_read,
617 .write = ocfs2_control_write,
618 .owner = THIS_MODULE,
621 struct miscdevice ocfs2_control_device = {
622 .minor = MISC_DYNAMIC_MINOR,
623 .name = "ocfs2_control",
624 .fops = &ocfs2_control_fops,
627 static int ocfs2_control_init(void)
631 atomic_set(&ocfs2_control_opened, 0);
633 rc = misc_register(&ocfs2_control_device);
636 "ocfs2: Unable to register ocfs2_control device "
643 static void ocfs2_control_exit(void)
647 rc = misc_deregister(&ocfs2_control_device);
650 "ocfs2: Unable to deregister ocfs2_control device "
655 static int __init user_stack_init(void)
657 return ocfs2_control_init();
660 static void __exit user_stack_exit(void)
662 ocfs2_control_exit();
665 MODULE_AUTHOR("Oracle");
666 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
667 MODULE_LICENSE("GPL");
668 module_init(user_stack_init);
669 module_exit(user_stack_exit);