]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sbin/hastd/control.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sbin / hastd / control.c
1 /*-
2  * Copyright (c) 2009-2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Pawel Jakub Dawidek under sponsorship from
6  * the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <signal.h>
36
37 #include <assert.h>
38 #include <errno.h>
39 #include <pthread.h>
40 #include <stdio.h>
41 #include <string.h>
42
43 #include "hast.h"
44 #include "hastd.h"
45 #include "hast_proto.h"
46 #include "nv.h"
47 #include "pjdlog.h"
48 #include "proto.h"
49 #include "subr.h"
50
51 #include "control.h"
52
53 static void
54 control_set_role(struct hastd_config *cfg, struct nv *nvout, uint8_t role,
55     struct hast_resource *res, const char *name, unsigned int no)
56 {
57
58         assert(cfg != NULL);
59         assert(nvout != NULL);
60         assert(name != NULL);
61
62         /* Name is always needed. */
63         nv_add_string(nvout, name, "resource%u", no);
64
65         if (res == NULL) {
66                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
67                         if (strcmp(res->hr_name, name) == 0)
68                                 break;
69                 }
70                 if (res == NULL) {
71                         nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
72                         return;
73                 }
74         }
75         assert(res != NULL);
76
77         /* Send previous role back. */
78         nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
79
80         /* Nothing changed, return here. */
81         if (role == res->hr_role)
82                 return;
83
84         pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
85         pjdlog_info("Role changed to %s.", role2str(role));
86
87         /* Change role to the new one. */
88         res->hr_role = role;
89         pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
90
91         /*
92          * If previous role was primary or secondary we have to kill process
93          * doing that work.
94          */
95         if (res->hr_workerpid != 0) {
96                 if (kill(res->hr_workerpid, SIGTERM) < 0) {
97                         pjdlog_errno(LOG_WARNING,
98                             "Unable to kill worker process %u",
99                             (unsigned int)res->hr_workerpid);
100                 } else if (waitpid(res->hr_workerpid, NULL, 0) !=
101                     res->hr_workerpid) {
102                         pjdlog_errno(LOG_WARNING,
103                             "Error while waiting for worker process %u",
104                             (unsigned int)res->hr_workerpid);
105                 } else {
106                         pjdlog_debug(1, "Worker process %u stopped.",
107                             (unsigned int)res->hr_workerpid);
108                 }
109                 res->hr_workerpid = 0;
110         }
111
112         /* Start worker process if we are changing to primary. */
113         if (role == HAST_ROLE_PRIMARY)
114                 hastd_primary(res);
115         pjdlog_prefix_set("%s", "");
116 }
117
118 static void
119 control_status_worker(struct hast_resource *res, struct nv *nvout,
120     unsigned int no)
121 {
122         struct nv *cnvin, *cnvout;
123         const char *str;
124         int error;
125
126         cnvin = cnvout = NULL;
127         error = 0;
128
129         /*
130          * Prepare and send command to worker process.
131          */
132         cnvout = nv_alloc();
133         nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
134         error = nv_error(cnvout);
135         if (error != 0) {
136                 /* LOG */
137                 goto end;
138         }
139         if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
140                 error = errno;
141                 /* LOG */
142                 goto end;
143         }
144
145         /*
146          * Receive response.
147          */
148         if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
149                 error = errno;
150                 /* LOG */
151                 goto end;
152         }
153
154         error = nv_get_int64(cnvin, "error");
155         if (error != 0)
156                 goto end;
157
158         if ((str = nv_get_string(cnvin, "status")) == NULL) {
159                 error = ENOENT;
160                 /* LOG */
161                 goto end;
162         }
163         nv_add_string(nvout, str, "status%u", no);
164         nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
165         nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
166             "extentsize%u", no);
167         nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
168             "keepdirty%u", no);
169 end:
170         if (cnvin != NULL)
171                 nv_free(cnvin);
172         if (cnvout != NULL)
173                 nv_free(cnvout);
174         if (error != 0)
175                 nv_add_int16(nvout, error, "error");
176 }
177
178 static void
179 control_status(struct hastd_config *cfg, struct nv *nvout,
180     struct hast_resource *res, const char *name, unsigned int no)
181 {
182
183         assert(cfg != NULL);
184         assert(nvout != NULL);
185         assert(name != NULL);
186
187         /* Name is always needed. */
188         nv_add_string(nvout, name, "resource%u", no);
189
190         if (res == NULL) {
191                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
192                         if (strcmp(res->hr_name, name) == 0)
193                                 break;
194                 }
195                 if (res == NULL) {
196                         nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
197                         return;
198                 }
199         }
200         assert(res != NULL);
201         nv_add_string(nvout, res->hr_provname, "provname%u", no);
202         nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
203         nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
204         switch (res->hr_replication) {
205         case HAST_REPLICATION_FULLSYNC:
206                 nv_add_string(nvout, "fullsync", "replication%u", no);
207                 break;
208         case HAST_REPLICATION_MEMSYNC:
209                 nv_add_string(nvout, "memsync", "replication%u", no);
210                 break;
211         case HAST_REPLICATION_ASYNC:
212                 nv_add_string(nvout, "async", "replication%u", no);
213                 break;
214         default:
215                 nv_add_string(nvout, "unknown", "replication%u", no);
216                 break;
217         }
218         nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
219
220         switch (res->hr_role) {
221         case HAST_ROLE_PRIMARY:
222                 assert(res->hr_workerpid != 0);
223                 /* FALLTHROUGH */
224         case HAST_ROLE_SECONDARY:
225                 if (res->hr_workerpid != 0)
226                         break;
227                 /* FALLTHROUGH */
228         default:
229                 return;
230         }
231
232         /*
233          * If we are here, it means that we have a worker process, which we
234          * want to ask some questions.
235          */
236         control_status_worker(res, nvout, no);
237 }
238
239 void
240 control_handle(struct hastd_config *cfg)
241 {
242         struct proto_conn *conn;
243         struct nv *nvin, *nvout;
244         unsigned int ii;
245         const char *str;
246         uint8_t cmd, role;
247         int error;
248
249         if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
250                 pjdlog_errno(LOG_ERR, "Unable to accept control connection");
251                 return;
252         }
253
254         nvin = nvout = NULL;
255         role = HAST_ROLE_UNDEF;
256
257         if (hast_proto_recv_hdr(conn, &nvin) < 0) {
258                 pjdlog_errno(LOG_ERR, "Unable to receive control header");
259                 nvin = NULL;
260                 goto close;
261         }
262
263         /* Obtain command code. 0 means that nv_get_uint8() failed. */
264         cmd = nv_get_uint8(nvin, "cmd");
265         if (cmd == 0) {
266                 pjdlog_error("Control header is missing 'cmd' field.");
267                 error = EHAST_INVALID;
268                 goto close;
269         }
270
271         /* Allocate outgoing nv structure. */
272         nvout = nv_alloc();
273         if (nvout == NULL) {
274                 pjdlog_error("Unable to allocate header for control response.");
275                 error = EHAST_NOMEMORY;
276                 goto close;
277         }
278
279         error = 0;
280
281         str = nv_get_string(nvin, "resource0");
282         if (str == NULL) {
283                 pjdlog_error("Control header is missing 'resource0' field.");
284                 error = EHAST_INVALID;
285                 goto fail;
286         }
287         if (cmd == HASTCTL_SET_ROLE) {
288                 role = nv_get_uint8(nvin, "role");
289                 switch (role) {
290                 case HAST_ROLE_INIT:    /* Is that valid to set, hmm? */
291                 case HAST_ROLE_PRIMARY:
292                 case HAST_ROLE_SECONDARY:
293                         break;
294                 default:
295                         pjdlog_error("Invalid role received (%hhu).", role);
296                         error = EHAST_INVALID;
297                         goto fail;
298                 }
299         }
300         if (strcmp(str, "all") == 0) {
301                 struct hast_resource *res;
302
303                 /* All configured resources. */
304
305                 ii = 0;
306                 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
307                         switch (cmd) {
308                         case HASTCTL_SET_ROLE:
309                                 control_set_role(cfg, nvout, role, res,
310                                     res->hr_name, ii++);
311                                 break;
312                         case HASTCTL_STATUS:
313                                 control_status(cfg, nvout, res, res->hr_name,
314                                     ii++);
315                                 break;
316                         default:
317                                 pjdlog_error("Invalid command received (%hhu).",
318                                     cmd);
319                                 error = EHAST_UNIMPLEMENTED;
320                                 goto fail;
321                         }
322                 }
323         } else {
324                 /* Only selected resources. */
325
326                 for (ii = 0; ; ii++) {
327                         str = nv_get_string(nvin, "resource%u", ii);
328                         if (str == NULL)
329                                 break;
330                         switch (cmd) {
331                         case HASTCTL_SET_ROLE:
332                                 control_set_role(cfg, nvout, role, NULL, str,
333                                     ii);
334                                 break;
335                         case HASTCTL_STATUS:
336                                 control_status(cfg, nvout, NULL, str, ii);
337                                 break;
338                         default:
339                                 pjdlog_error("Invalid command received (%hhu).",
340                                     cmd);
341                                 error = EHAST_UNIMPLEMENTED;
342                                 goto fail;
343                         }
344                 }
345         }
346         if (nv_error(nvout) != 0)
347                 goto close;
348 fail:
349         if (error != 0)
350                 nv_add_int16(nvout, error, "error");
351
352         if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
353                 pjdlog_errno(LOG_ERR, "Unable to send control response");
354 close:
355         if (nvin != NULL)
356                 nv_free(nvin);
357         if (nvout != NULL)
358                 nv_free(nvout);
359         proto_close(conn);
360 }
361
362 /*
363  * Thread handles control requests from the parent.
364  */
365 void *
366 ctrl_thread(void *arg)
367 {
368         struct hast_resource *res = arg;
369         struct nv *nvin, *nvout;
370         uint8_t cmd;
371
372         for (;;) {
373                 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
374                         if (sigexit_received)
375                                 pthread_exit(NULL);
376                         pjdlog_errno(LOG_ERR,
377                             "Unable to receive control message");
378                         continue;
379                 }
380                 cmd = nv_get_uint8(nvin, "cmd");
381                 if (cmd == 0) {
382                         pjdlog_error("Control message is missing 'cmd' field.");
383                         nv_free(nvin);
384                         continue;
385                 }
386                 nv_free(nvin);
387                 nvout = nv_alloc();
388                 switch (cmd) {
389                 case HASTCTL_STATUS:
390                         if (res->hr_remotein != NULL &&
391                             res->hr_remoteout != NULL) {
392                                 nv_add_string(nvout, "complete", "status");
393                         } else {
394                                 nv_add_string(nvout, "degraded", "status");
395                         }
396                         nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
397                             "extentsize");
398                         if (res->hr_role == HAST_ROLE_PRIMARY) {
399                                 nv_add_uint32(nvout,
400                                     (uint32_t)res->hr_keepdirty, "keepdirty");
401                                 nv_add_uint64(nvout,
402                                     (uint64_t)(activemap_ndirty(res->hr_amp) *
403                                     res->hr_extentsize), "dirty");
404                         } else {
405                                 nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
406                                 nv_add_uint64(nvout, (uint64_t)0, "dirty");
407                         }
408                         break;
409                 default:
410                         nv_add_int16(nvout, EINVAL, "error");
411                         break;
412                 }
413                 if (nv_error(nvout) != 0) {
414                         pjdlog_error("Unable to create answer on control message.");
415                         nv_free(nvout);
416                         continue;
417                 }
418                 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
419                         pjdlog_errno(LOG_ERR,
420                             "Unable to send reply to control message");
421                 }
422                 nv_free(nvout);
423         }
424         /* NOTREACHED */
425         return (NULL);
426 }