]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sbin/hastd/control.c
MFC r213265:
[FreeBSD/releng/8.2.git] / sbin / hastd / control.c
1 /*-
2  * Copyright (c) 2009-2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Pawel Jakub Dawidek under sponsorship from
6  * the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/types.h>
34 #include <sys/wait.h>
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <pthread.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <unistd.h>
43
44 #include "hast.h"
45 #include "hastd.h"
46 #include "hast_proto.h"
47 #include "hooks.h"
48 #include "nv.h"
49 #include "pjdlog.h"
50 #include "proto.h"
51 #include "subr.h"
52
53 #include "control.h"
54
55 void
56 child_cleanup(struct hast_resource *res)
57 {
58
59         proto_close(res->hr_ctrl);
60         res->hr_ctrl = NULL;
61         if (res->hr_event != NULL) {
62                 proto_close(res->hr_event);
63                 res->hr_event = NULL;
64         }
65         res->hr_workerpid = 0;
66 }
67
68 static void
69 control_set_role_common(struct hastd_config *cfg, struct nv *nvout,
70     uint8_t role, struct hast_resource *res, const char *name, unsigned int no)
71 {
72         int oldrole;
73
74         /* Name is always needed. */
75         if (name != NULL)
76                 nv_add_string(nvout, name, "resource%u", no);
77
78         if (res == NULL) {
79                 assert(cfg != NULL);
80                 assert(name != NULL);
81
82                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
83                         if (strcmp(res->hr_name, name) == 0)
84                                 break;
85                 }
86                 if (res == NULL) {
87                         nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
88                         return;
89                 }
90         }
91         assert(res != NULL);
92
93         /* Send previous role back. */
94         nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
95
96         /* Nothing changed, return here. */
97         if (role == res->hr_role)
98                 return;
99
100         pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
101         pjdlog_info("Role changed to %s.", role2str(role));
102
103         /* Change role to the new one. */
104         oldrole = res->hr_role;
105         res->hr_role = role;
106         pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
107
108         /*
109          * If previous role was primary or secondary we have to kill process
110          * doing that work.
111          */
112         if (res->hr_workerpid != 0) {
113                 if (kill(res->hr_workerpid, SIGTERM) < 0) {
114                         pjdlog_errno(LOG_WARNING,
115                             "Unable to kill worker process %u",
116                             (unsigned int)res->hr_workerpid);
117                 } else if (waitpid(res->hr_workerpid, NULL, 0) !=
118                     res->hr_workerpid) {
119                         pjdlog_errno(LOG_WARNING,
120                             "Error while waiting for worker process %u",
121                             (unsigned int)res->hr_workerpid);
122                 } else {
123                         pjdlog_debug(1, "Worker process %u stopped.",
124                             (unsigned int)res->hr_workerpid);
125                 }
126                 child_cleanup(res);
127         }
128
129         /* Start worker process if we are changing to primary. */
130         if (role == HAST_ROLE_PRIMARY)
131                 hastd_primary(res);
132         pjdlog_prefix_set("%s", "");
133         hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole),
134             role2str(res->hr_role), NULL);
135 }
136
137 void
138 control_set_role(struct hast_resource *res, uint8_t role)
139 {
140
141         control_set_role_common(NULL, NULL, role, res, NULL, 0);
142 }
143
144 static void
145 control_status_worker(struct hast_resource *res, struct nv *nvout,
146     unsigned int no)
147 {
148         struct nv *cnvin, *cnvout;
149         const char *str;
150         int error;
151
152         cnvin = cnvout = NULL;
153         error = 0;
154
155         /*
156          * Prepare and send command to worker process.
157          */
158         cnvout = nv_alloc();
159         nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
160         error = nv_error(cnvout);
161         if (error != 0) {
162                 /* LOG */
163                 goto end;
164         }
165         if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
166                 error = errno;
167                 /* LOG */
168                 goto end;
169         }
170
171         /*
172          * Receive response.
173          */
174         if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
175                 error = errno;
176                 /* LOG */
177                 goto end;
178         }
179
180         error = nv_get_int64(cnvin, "error");
181         if (error != 0)
182                 goto end;
183
184         if ((str = nv_get_string(cnvin, "status")) == NULL) {
185                 error = ENOENT;
186                 /* LOG */
187                 goto end;
188         }
189         nv_add_string(nvout, str, "status%u", no);
190         nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
191         nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
192             "extentsize%u", no);
193         nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
194             "keepdirty%u", no);
195 end:
196         if (cnvin != NULL)
197                 nv_free(cnvin);
198         if (cnvout != NULL)
199                 nv_free(cnvout);
200         if (error != 0)
201                 nv_add_int16(nvout, error, "error");
202 }
203
204 static void
205 control_status(struct hastd_config *cfg, struct nv *nvout,
206     struct hast_resource *res, const char *name, unsigned int no)
207 {
208
209         assert(cfg != NULL);
210         assert(nvout != NULL);
211         assert(name != NULL);
212
213         /* Name is always needed. */
214         nv_add_string(nvout, name, "resource%u", no);
215
216         if (res == NULL) {
217                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
218                         if (strcmp(res->hr_name, name) == 0)
219                                 break;
220                 }
221                 if (res == NULL) {
222                         nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
223                         return;
224                 }
225         }
226         assert(res != NULL);
227         nv_add_string(nvout, res->hr_provname, "provname%u", no);
228         nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
229         nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
230         switch (res->hr_replication) {
231         case HAST_REPLICATION_FULLSYNC:
232                 nv_add_string(nvout, "fullsync", "replication%u", no);
233                 break;
234         case HAST_REPLICATION_MEMSYNC:
235                 nv_add_string(nvout, "memsync", "replication%u", no);
236                 break;
237         case HAST_REPLICATION_ASYNC:
238                 nv_add_string(nvout, "async", "replication%u", no);
239                 break;
240         default:
241                 nv_add_string(nvout, "unknown", "replication%u", no);
242                 break;
243         }
244         nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
245
246         switch (res->hr_role) {
247         case HAST_ROLE_PRIMARY:
248                 assert(res->hr_workerpid != 0);
249                 /* FALLTHROUGH */
250         case HAST_ROLE_SECONDARY:
251                 if (res->hr_workerpid != 0)
252                         break;
253                 /* FALLTHROUGH */
254         default:
255                 return;
256         }
257
258         /*
259          * If we are here, it means that we have a worker process, which we
260          * want to ask some questions.
261          */
262         control_status_worker(res, nvout, no);
263 }
264
265 void
266 control_handle(struct hastd_config *cfg)
267 {
268         struct proto_conn *conn;
269         struct nv *nvin, *nvout;
270         unsigned int ii;
271         const char *str;
272         uint8_t cmd, role;
273         int error;
274
275         if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
276                 pjdlog_errno(LOG_ERR, "Unable to accept control connection");
277                 return;
278         }
279
280         nvin = nvout = NULL;
281         role = HAST_ROLE_UNDEF;
282
283         if (hast_proto_recv_hdr(conn, &nvin) < 0) {
284                 pjdlog_errno(LOG_ERR, "Unable to receive control header");
285                 nvin = NULL;
286                 goto close;
287         }
288
289         /* Obtain command code. 0 means that nv_get_uint8() failed. */
290         cmd = nv_get_uint8(nvin, "cmd");
291         if (cmd == 0) {
292                 pjdlog_error("Control header is missing 'cmd' field.");
293                 error = EHAST_INVALID;
294                 goto close;
295         }
296
297         /* Allocate outgoing nv structure. */
298         nvout = nv_alloc();
299         if (nvout == NULL) {
300                 pjdlog_error("Unable to allocate header for control response.");
301                 error = EHAST_NOMEMORY;
302                 goto close;
303         }
304
305         error = 0;
306
307         str = nv_get_string(nvin, "resource0");
308         if (str == NULL) {
309                 pjdlog_error("Control header is missing 'resource0' field.");
310                 error = EHAST_INVALID;
311                 goto fail;
312         }
313         if (cmd == HASTCTL_SET_ROLE) {
314                 role = nv_get_uint8(nvin, "role");
315                 switch (role) {
316                 case HAST_ROLE_INIT:    /* Is that valid to set, hmm? */
317                 case HAST_ROLE_PRIMARY:
318                 case HAST_ROLE_SECONDARY:
319                         break;
320                 default:
321                         pjdlog_error("Invalid role received (%hhu).", role);
322                         error = EHAST_INVALID;
323                         goto fail;
324                 }
325         }
326         if (strcmp(str, "all") == 0) {
327                 struct hast_resource *res;
328
329                 /* All configured resources. */
330
331                 ii = 0;
332                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
333                         switch (cmd) {
334                         case HASTCTL_SET_ROLE:
335                                 control_set_role_common(cfg, nvout, role, res,
336                                     res->hr_name, ii++);
337                                 break;
338                         case HASTCTL_STATUS:
339                                 control_status(cfg, nvout, res, res->hr_name,
340                                     ii++);
341                                 break;
342                         default:
343                                 pjdlog_error("Invalid command received (%hhu).",
344                                     cmd);
345                                 error = EHAST_UNIMPLEMENTED;
346                                 goto fail;
347                         }
348                 }
349         } else {
350                 /* Only selected resources. */
351
352                 for (ii = 0; ; ii++) {
353                         str = nv_get_string(nvin, "resource%u", ii);
354                         if (str == NULL)
355                                 break;
356                         switch (cmd) {
357                         case HASTCTL_SET_ROLE:
358                                 control_set_role_common(cfg, nvout, role, NULL,
359                                     str, ii);
360                                 break;
361                         case HASTCTL_STATUS:
362                                 control_status(cfg, nvout, NULL, str, ii);
363                                 break;
364                         default:
365                                 pjdlog_error("Invalid command received (%hhu).",
366                                     cmd);
367                                 error = EHAST_UNIMPLEMENTED;
368                                 goto fail;
369                         }
370                 }
371         }
372         if (nv_error(nvout) != 0)
373                 goto close;
374 fail:
375         if (error != 0)
376                 nv_add_int16(nvout, error, "error");
377
378         if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
379                 pjdlog_errno(LOG_ERR, "Unable to send control response");
380 close:
381         if (nvin != NULL)
382                 nv_free(nvin);
383         if (nvout != NULL)
384                 nv_free(nvout);
385         proto_close(conn);
386 }
387
388 /*
389  * Thread handles control requests from the parent.
390  */
391 void *
392 ctrl_thread(void *arg)
393 {
394         struct hast_resource *res = arg;
395         struct nv *nvin, *nvout;
396         uint8_t cmd;
397
398         for (;;) {
399                 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
400                         if (sigexit_received)
401                                 pthread_exit(NULL);
402                         pjdlog_errno(LOG_ERR,
403                             "Unable to receive control message");
404                         kill(getpid(), SIGTERM);
405                         pthread_exit(NULL);
406                 }
407                 cmd = nv_get_uint8(nvin, "cmd");
408                 if (cmd == 0) {
409                         pjdlog_error("Control message is missing 'cmd' field.");
410                         nv_free(nvin);
411                         continue;
412                 }
413                 nv_free(nvin);
414                 nvout = nv_alloc();
415                 switch (cmd) {
416                 case HASTCTL_STATUS:
417                         if (res->hr_remotein != NULL &&
418                             res->hr_remoteout != NULL) {
419                                 nv_add_string(nvout, "complete", "status");
420                         } else {
421                                 nv_add_string(nvout, "degraded", "status");
422                         }
423                         nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
424                             "extentsize");
425                         if (res->hr_role == HAST_ROLE_PRIMARY) {
426                                 nv_add_uint32(nvout,
427                                     (uint32_t)res->hr_keepdirty, "keepdirty");
428                                 nv_add_uint64(nvout,
429                                     (uint64_t)(activemap_ndirty(res->hr_amp) *
430                                     res->hr_extentsize), "dirty");
431                         } else {
432                                 nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
433                                 nv_add_uint64(nvout, (uint64_t)0, "dirty");
434                         }
435                         break;
436                 default:
437                         nv_add_int16(nvout, EINVAL, "error");
438                         break;
439                 }
440                 if (nv_error(nvout) != 0) {
441                         pjdlog_error("Unable to create answer on control message.");
442                         nv_free(nvout);
443                         continue;
444                 }
445                 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
446                         pjdlog_errno(LOG_ERR,
447                             "Unable to send reply to control message");
448                 }
449                 nv_free(nvout);
450         }
451         /* NOTREACHED */
452         return (NULL);
453 }