]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/geom/raid/g_raid.h
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / geom / raid / g_raid.h
1 /*-
2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28
29 #ifndef _G_RAID_H_
30 #define _G_RAID_H_
31
32 #include <sys/param.h>
33 #include <sys/kobj.h>
34 #include <sys/bio.h>
35 #include <sys/time.h>
36
37 #define G_RAID_CLASS_NAME       "RAID"
38
39 #define G_RAID_MAGIC            "GEOM::RAID"
40
41 #define G_RAID_VERSION          0
42
43 struct g_raid_md_object;
44 struct g_raid_tr_object;
45
46 #define G_RAID_DEVICE_FLAG_NOAUTOSYNC   0x0000000000000001ULL
47 #define G_RAID_DEVICE_FLAG_NOFAILSYNC   0x0000000000000002ULL
48 #define G_RAID_DEVICE_FLAG_MASK (G_RAID_DEVICE_FLAG_NOAUTOSYNC | \
49                                          G_RAID_DEVICE_FLAG_NOFAILSYNC)
50
51 #ifdef _KERNEL
52 extern u_int g_raid_aggressive_spare;
53 extern u_int g_raid_debug;
54 extern int g_raid_read_err_thresh;
55 extern u_int g_raid_start_timeout;
56 extern struct g_class g_raid_class;
57
58 #define G_RAID_DEBUG(lvl, fmt, ...)     do {                            \
59         if (g_raid_debug >= (lvl)) {                                    \
60                 if (g_raid_debug > 0) {                                 \
61                         printf("GEOM_RAID[%u]: " fmt "\n",              \
62                             lvl, ## __VA_ARGS__);                       \
63                 } else {                                                \
64                         printf("GEOM_RAID: " fmt "\n",                  \
65                             ## __VA_ARGS__);                            \
66                 }                                                       \
67         }                                                               \
68 } while (0)
69 #define G_RAID_DEBUG1(lvl, sc, fmt, ...)        do {                    \
70         if (g_raid_debug >= (lvl)) {                                    \
71                 if (g_raid_debug > 0) {                                 \
72                         printf("GEOM_RAID[%u]: %s: " fmt "\n",          \
73                             lvl, (sc)->sc_name, ## __VA_ARGS__);        \
74                 } else {                                                \
75                         printf("GEOM_RAID: %s: " fmt "\n",              \
76                             (sc)->sc_name, ## __VA_ARGS__);             \
77                 }                                                       \
78         }                                                               \
79 } while (0)
80 #define G_RAID_LOGREQ(lvl, bp, fmt, ...)        do {                    \
81         if (g_raid_debug >= (lvl)) {                                    \
82                 if (g_raid_debug > 0) {                                 \
83                         printf("GEOM_RAID[%u]: " fmt " ",               \
84                             lvl, ## __VA_ARGS__);                       \
85                 } else                                                  \
86                         printf("GEOM_RAID: " fmt " ", ## __VA_ARGS__);  \
87                 g_print_bio(bp);                                        \
88                 printf("\n");                                           \
89         }                                                               \
90 } while (0)
91
92 /*
93  * Flags we use to distinguish I/O initiated by the TR layer to maintain
94  * the volume's characteristics, fix subdisks, extra copies of data, etc.
95  *
96  * G_RAID_BIO_FLAG_SYNC         I/O to update an extra copy of the data
97  *                              for RAID volumes that maintain extra data
98  *                              and need to rebuild that data.
99  * G_RAID_BIO_FLAG_REMAP        I/O done to try to provoke a subdisk into
100  *                              doing some desirable action such as bad
101  *                              block remapping after we detect a bad part
102  *                              of the disk.
103  * G_RAID_BIO_FLAG_LOCKED       I/O holds range lock that should re released.
104  *
105  * and the following meta item:
106  * G_RAID_BIO_FLAG_SPECIAL      And of the I/O flags that need to make it
107  *                              through the range locking which would
108  *                              otherwise defer the I/O until after that
109  *                              range is unlocked.
110  */
111 #define G_RAID_BIO_FLAG_SYNC            0x01
112 #define G_RAID_BIO_FLAG_REMAP           0x02
113 #define G_RAID_BIO_FLAG_SPECIAL \
114                 (G_RAID_BIO_FLAG_SYNC|G_RAID_BIO_FLAG_REMAP)
115 #define G_RAID_BIO_FLAG_LOCKED          0x80
116
117 struct g_raid_lock {
118         off_t                    l_offset;
119         off_t                    l_length;
120         void                    *l_callback_arg;
121         int                      l_pending;
122         LIST_ENTRY(g_raid_lock)  l_next;
123 };
124
125 #define G_RAID_EVENT_WAIT       0x01
126 #define G_RAID_EVENT_VOLUME     0x02
127 #define G_RAID_EVENT_SUBDISK    0x04
128 #define G_RAID_EVENT_DISK       0x08
129 #define G_RAID_EVENT_DONE       0x10
130 struct g_raid_event {
131         void                    *e_tgt;
132         int                      e_event;
133         int                      e_flags;
134         int                      e_error;
135         TAILQ_ENTRY(g_raid_event) e_next;
136 };
137 #define G_RAID_DISK_S_NONE              0x00    /* State is unknown. */
138 #define G_RAID_DISK_S_OFFLINE           0x01    /* Missing disk placeholder. */
139 #define G_RAID_DISK_S_FAILED            0x02    /* Failed. */
140 #define G_RAID_DISK_S_STALE_FAILED      0x03    /* Old failed. */
141 #define G_RAID_DISK_S_SPARE             0x04    /* Hot-spare. */
142 #define G_RAID_DISK_S_STALE             0x05    /* Old disk, unused now. */
143 #define G_RAID_DISK_S_ACTIVE            0x06    /* Operational. */
144
145 #define G_RAID_DISK_E_DISCONNECTED      0x01
146
147 struct g_raid_disk {
148         struct g_raid_softc     *d_softc;       /* Back-pointer to softc. */
149         struct g_consumer       *d_consumer;    /* GEOM disk consumer. */
150         void                    *d_md_data;     /* Disk's metadata storage. */
151         struct g_kerneldump      d_kd;          /* Kernel dumping method/args. */
152         uint64_t                 d_flags;       /* Additional flags. */
153         u_int                    d_state;       /* Disk state. */
154         u_int                    d_load;        /* Disk average load. */
155         off_t                    d_last_offset; /* Last head offset. */
156         int                      d_read_errs;   /* Count of the read errors */
157         TAILQ_HEAD(, g_raid_subdisk)     d_subdisks; /* List of subdisks. */
158         TAILQ_ENTRY(g_raid_disk)         d_next;        /* Next disk in the node. */
159 };
160
161 #define G_RAID_SUBDISK_S_NONE           0x00    /* Absent. */
162 #define G_RAID_SUBDISK_S_FAILED         0x01    /* Failed. */
163 #define G_RAID_SUBDISK_S_NEW            0x02    /* Blank. */
164 #define G_RAID_SUBDISK_S_REBUILD        0x03    /* Blank + rebuild. */
165 #define G_RAID_SUBDISK_S_UNINITIALIZED  0x04    /* Disk of the new volume. */
166 #define G_RAID_SUBDISK_S_STALE          0x05    /* Dirty. */
167 #define G_RAID_SUBDISK_S_RESYNC         0x06    /* Dirty + check/repair. */
168 #define G_RAID_SUBDISK_S_ACTIVE         0x07    /* Usable. */
169
170 #define G_RAID_SUBDISK_E_NEW            0x01    /* A new subdisk has arrived */
171 #define G_RAID_SUBDISK_E_FAILED         0x02    /* A subdisk failed, but remains in volume */
172 #define G_RAID_SUBDISK_E_DISCONNECTED   0x03    /* A subdisk removed from volume. */
173 #define G_RAID_SUBDISK_E_FIRST_TR_PRIVATE 0x80  /* translation private events */
174
175 #define G_RAID_SUBDISK_POS(sd)                                          \
176     ((sd)->sd_disk ? ((sd)->sd_disk->d_last_offset - (sd)->sd_offset) : 0)
177 #define G_RAID_SUBDISK_TRACK_SIZE       (1 * 1024 * 1024)
178 #define G_RAID_SUBDISK_LOAD(sd)                                         \
179     ((sd)->sd_disk ? ((sd)->sd_disk->d_load) : 0)
180 #define G_RAID_SUBDISK_LOAD_SCALE       256
181
182 struct g_raid_subdisk {
183         struct g_raid_softc     *sd_softc;      /* Back-pointer to softc. */
184         struct g_raid_disk      *sd_disk;       /* Where this subdisk lives. */
185         struct g_raid_volume    *sd_volume;     /* Volume, sd is a part of. */
186         off_t                    sd_offset;     /* Offset on the disk. */
187         off_t                    sd_size;       /* Size on the disk. */
188         u_int                    sd_pos;        /* Position in volume. */
189         u_int                    sd_state;      /* Subdisk state. */
190         off_t                    sd_rebuild_pos; /* Rebuild position. */
191         int                      sd_recovery;   /* Count of recovery reqs. */
192         TAILQ_ENTRY(g_raid_subdisk)      sd_next; /* Next subdisk on disk. */
193 };
194
195 #define G_RAID_MAX_SUBDISKS     16
196 #define G_RAID_MAX_VOLUMENAME   32
197
198 #define G_RAID_VOLUME_S_STARTING        0x00
199 #define G_RAID_VOLUME_S_BROKEN          0x01
200 #define G_RAID_VOLUME_S_DEGRADED        0x02
201 #define G_RAID_VOLUME_S_SUBOPTIMAL      0x03
202 #define G_RAID_VOLUME_S_OPTIMAL         0x04
203 #define G_RAID_VOLUME_S_UNSUPPORTED     0x05
204 #define G_RAID_VOLUME_S_STOPPED         0x06
205
206 #define G_RAID_VOLUME_S_ALIVE(s)                        \
207     ((s) == G_RAID_VOLUME_S_DEGRADED ||                 \
208      (s) == G_RAID_VOLUME_S_SUBOPTIMAL ||               \
209      (s) == G_RAID_VOLUME_S_OPTIMAL)
210
211 #define G_RAID_VOLUME_E_DOWN            0x00
212 #define G_RAID_VOLUME_E_UP              0x01
213 #define G_RAID_VOLUME_E_START           0x10
214 #define G_RAID_VOLUME_E_STARTMD         0x11
215
216 #define G_RAID_VOLUME_RL_RAID0          0x00
217 #define G_RAID_VOLUME_RL_RAID1          0x01
218 #define G_RAID_VOLUME_RL_RAID3          0x03
219 #define G_RAID_VOLUME_RL_RAID4          0x04
220 #define G_RAID_VOLUME_RL_RAID5          0x05
221 #define G_RAID_VOLUME_RL_RAID6          0x06
222 #define G_RAID_VOLUME_RL_RAID1E         0x11
223 #define G_RAID_VOLUME_RL_SINGLE         0x0f
224 #define G_RAID_VOLUME_RL_CONCAT         0x1f
225 #define G_RAID_VOLUME_RL_RAID5E         0x15
226 #define G_RAID_VOLUME_RL_RAID5EE        0x25
227 #define G_RAID_VOLUME_RL_UNKNOWN        0xff
228
229 #define G_RAID_VOLUME_RLQ_NONE          0x00
230 #define G_RAID_VOLUME_RLQ_UNKNOWN       0xff
231
232 struct g_raid_volume;
233
234 struct g_raid_volume {
235         struct g_raid_softc     *v_softc;       /* Back-pointer to softc. */
236         struct g_provider       *v_provider;    /* GEOM provider. */
237         struct g_raid_subdisk    v_subdisks[G_RAID_MAX_SUBDISKS];
238                                                 /* Subdisks of this volume. */
239         void                    *v_md_data;     /* Volume's metadata storage. */
240         struct g_raid_tr_object *v_tr;          /* Transformation object. */
241         char                     v_name[G_RAID_MAX_VOLUMENAME];
242                                                 /* Volume name. */
243         u_int                    v_state;       /* Volume state. */
244         u_int                    v_raid_level;  /* Array RAID level. */
245         u_int                    v_raid_level_qualifier; /* RAID level det. */
246         u_int                    v_disks_count; /* Number of disks in array. */
247         u_int                    v_strip_size;  /* Array strip size. */
248         u_int                    v_sectorsize;  /* Volume sector size. */
249         off_t                    v_mediasize;   /* Volume media size.  */
250         struct bio_queue_head    v_inflight;    /* In-flight write requests. */
251         struct bio_queue_head    v_locked;      /* Blocked I/O requests. */
252         LIST_HEAD(, g_raid_lock) v_locks;        /* List of locked regions. */
253         int                      v_pending_lock; /* writes to locked region */
254         int                      v_dirty;       /* Volume is DIRTY. */
255         struct timeval           v_last_done;   /* Time of the last I/O. */
256         time_t                   v_last_write;  /* Time of the last write. */
257         u_int                    v_writes;      /* Number of active writes. */
258         struct root_hold_token  *v_rootmount;   /* Root mount delay token. */
259         int                      v_starting;    /* Volume is starting */
260         int                      v_stopping;    /* Volume is stopping */
261         int                      v_provider_open; /* Number of opens. */
262         int                      v_global_id;   /* Global volume ID (rX). */
263         TAILQ_ENTRY(g_raid_volume)       v_next; /* List of volumes entry. */
264         LIST_ENTRY(g_raid_volume)        v_global_next; /* Global list entry. */
265 };
266
267 #define G_RAID_NODE_E_WAKE      0x00
268 #define G_RAID_NODE_E_START     0x01
269
270 struct g_raid_softc {
271         struct g_raid_md_object *sc_md;         /* Metadata object. */
272         struct g_geom           *sc_geom;       /* GEOM class instance. */
273         uint64_t                 sc_flags;      /* Additional flags. */
274         TAILQ_HEAD(, g_raid_volume)      sc_volumes;    /* List of volumes. */
275         TAILQ_HEAD(, g_raid_disk)        sc_disks;      /* List of disks. */
276         struct sx                sc_lock;       /* Main node lock. */
277         struct proc             *sc_worker;     /* Worker process. */
278         struct mtx               sc_queue_mtx;  /* Worker queues lock. */
279         TAILQ_HEAD(, g_raid_event) sc_events;   /* Worker events queue. */
280         struct bio_queue_head    sc_queue;      /* Worker I/O queue. */
281         int                      sc_stopping;   /* Node is stopping */
282 };
283 #define sc_name sc_geom->name
284
285 /*
286  * KOBJ parent class of metadata processing modules.
287  */
288 struct g_raid_md_class {
289         KOBJ_CLASS_FIELDS;
290         int              mdc_priority;
291         LIST_ENTRY(g_raid_md_class) mdc_list;
292 };
293
294 /*
295  * KOBJ instance of metadata processing module.
296  */
297 struct g_raid_md_object {
298         KOBJ_FIELDS;
299         struct g_raid_md_class  *mdo_class;
300         struct g_raid_softc     *mdo_softc;     /* Back-pointer to softc. */
301 };
302
303 int g_raid_md_modevent(module_t, int, void *);
304
305 #define G_RAID_MD_DECLARE(name)                                 \
306     static moduledata_t name##_mod = {                          \
307         #name,                                                  \
308         g_raid_md_modevent,                                     \
309         &name##_class                                           \
310     };                                                          \
311     DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);  \
312     MODULE_DEPEND(name, geom_raid, 0, 0, 0)
313
314 /*
315  * KOBJ parent class of data transformation modules.
316  */
317 struct g_raid_tr_class {
318         KOBJ_CLASS_FIELDS;
319         int              trc_priority;
320         LIST_ENTRY(g_raid_tr_class) trc_list;
321 };
322
323 /*
324  * KOBJ instance of data transformation module.
325  */
326 struct g_raid_tr_object {
327         KOBJ_FIELDS;
328         struct g_raid_tr_class  *tro_class;
329         struct g_raid_volume    *tro_volume;    /* Back-pointer to volume. */
330 };
331
332 int g_raid_tr_modevent(module_t, int, void *);
333
334 #define G_RAID_TR_DECLARE(name)                                 \
335     static moduledata_t name##_mod = {                          \
336         #name,                                                  \
337         g_raid_tr_modevent,                                     \
338         &name##_class                                           \
339     };                                                          \
340     DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);   \
341     MODULE_DEPEND(name, geom_raid, 0, 0, 0)
342
343 const char * g_raid_volume_level2str(int level, int qual);
344 int g_raid_volume_str2level(const char *str, int *level, int *qual);
345 const char * g_raid_volume_state2str(int state);
346 const char * g_raid_subdisk_state2str(int state);
347 const char * g_raid_disk_state2str(int state);
348
349 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
350     const char *name, struct g_raid_md_object *md);
351 int g_raid_create_node_format(const char *format, struct g_geom **gp);
352 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
353     const char *name, int id);
354 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);
355 const char * g_raid_get_diskname(struct g_raid_disk *disk);
356
357 int g_raid_start_volume(struct g_raid_volume *vol);
358
359 int g_raid_destroy_node(struct g_raid_softc *sc, int worker);
360 int g_raid_destroy_volume(struct g_raid_volume *vol);
361 int g_raid_destroy_disk(struct g_raid_disk *disk);
362
363 void g_raid_iodone(struct bio *bp, int error);
364 void g_raid_subdisk_iostart(struct g_raid_subdisk *sd, struct bio *bp);
365 int g_raid_subdisk_kerneldump(struct g_raid_subdisk *sd,
366     void *virtual, vm_offset_t physical, off_t offset, size_t length);
367
368 struct g_consumer *g_raid_open_consumer(struct g_raid_softc *sc,
369     const char *name);
370 void g_raid_kill_consumer(struct g_raid_softc *sc, struct g_consumer *cp);
371
372 void g_raid_report_disk_state(struct g_raid_disk *disk);
373 void g_raid_change_disk_state(struct g_raid_disk *disk, int state);
374 void g_raid_change_subdisk_state(struct g_raid_subdisk *sd, int state);
375 void g_raid_change_volume_state(struct g_raid_volume *vol, int state);
376
377 void g_raid_write_metadata(struct g_raid_softc *sc, struct g_raid_volume *vol,
378     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
379 void g_raid_fail_disk(struct g_raid_softc *sc,
380     struct g_raid_subdisk *sd, struct g_raid_disk *disk);
381
382 void g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp);
383 int g_raid_tr_kerneldump_common(struct g_raid_tr_object *tr,
384     void *virtual, vm_offset_t physical, off_t offset, size_t length);
385
386 u_int g_raid_ndisks(struct g_raid_softc *sc, int state);
387 u_int g_raid_nsubdisks(struct g_raid_volume *vol, int state);
388 u_int g_raid_nopens(struct g_raid_softc *sc);
389 struct g_raid_subdisk * g_raid_get_subdisk(struct g_raid_volume *vol,
390     int state);
391 #define G_RAID_DESTROY_SOFT             0
392 #define G_RAID_DESTROY_DELAYED  1
393 #define G_RAID_DESTROY_HARD             2
394 int g_raid_destroy(struct g_raid_softc *sc, int how);
395 int g_raid_event_send(void *arg, int event, int flags);
396 int g_raid_lock_range(struct g_raid_volume *vol, off_t off, off_t len,
397     struct bio *ignore, void *argp);
398 int g_raid_unlock_range(struct g_raid_volume *vol, off_t off, off_t len);
399
400 g_ctl_req_t g_raid_ctl;
401 #endif  /* _KERNEL */
402
403 #endif  /* !_G_RAID_H_ */