2 * Copyright (c) 2021 Netflix, Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions, and the following disclaimer,
9 * without modification.
10 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
11 * substantially similar to the "NO WARRANTY" disclaimer below
12 * ("Disclaimer") and any redistribution must be conditioned upon
13 * including a substantially similar Disclaimer requirement for further
14 * binary redistribution.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGES.
31 * SCSI disk depop (head depopulation) support
33 * The standard defines 'storage elements' as the generic way of referring to a
34 * disk drive head. Each storage element has an identifier and an active status.
35 * The health of an element can be queried. Active elements may be removed from
36 * service with a REMOVE ELEMENT AND TRUNCATE (RET) command. Inactive element
37 * may be returned to service with a RESTORE ELEMENTS AND REBUILD (RER)
38 * command. GET PHYSICAL ELEMENT STATUS (GPES) will return a list of elements,
39 * their health, whether they are in service, how much capacity the element is
42 * When a depop operation starts, the drive becomes format corrupt. No normal
43 * I/O can be done to the drive and a limited number of CDBs will
44 * succeed. Status can be obtained by either a TEST UNIT READY or a GPES
45 * command. A drive reset will not stop a depop operation, but a power cycle
46 * will. A failed depop operation will be reported when the next TEST UNIT READY
47 * is sent to the drive. Drives that are format corrupt after an interrupted
48 * operation need to have that operation repeated.
50 * 'depop' provides a wrapper around all these functions.
53 #include <sys/cdefs.h>
54 __FBSDID("$FreeBSD$");
56 #include <sys/types.h>
66 #include <cam/cam_debug.h>
67 #include <cam/cam_ccb.h>
68 #include <cam/scsi/scsi_all.h>
69 #include <cam/scsi/scsi_message.h>
71 #include <scsi_wrap.h>
72 #include "camcontrol.h"
82 depop_list(struct cam_device *device, int task_attr, int retry_count,
83 int timeout, int verbosemode __unused)
87 struct scsi_get_physical_element_hdr *hdr;
88 struct scsi_get_physical_element_descriptor *dtor_ptr;
90 hdr = scsi_wrap_get_physical_element_status(device, task_attr, retry_count, timeout,
91 SCSI_GPES_FILTER_ALL | SCSI_GPES_REPORT_TYPE_PHYS, 1);
93 errx(1, "scsi_wrap_get_physical_element_status returned an error");
96 * OK, we have the data, not report it out.
98 dtor_ptr = (struct scsi_get_physical_element_descriptor *)(hdr + 1);
99 dtors = scsi_4btoul(hdr->num_descriptors);
100 printf("Elem ID * Health Capacity\n");
101 for (uint32_t i = 0; i < dtors; i++) {
102 uint32_t id = scsi_4btoul(dtor_ptr[i].element_identifier);
103 uint8_t ralwd = dtor_ptr[i].ralwd;
104 uint8_t type = dtor_ptr[i].physical_element_type;
105 uint8_t health = dtor_ptr[i].physical_element_health;
106 uint64_t cap = scsi_8btou64(dtor_ptr[i].capacity);
107 if (type != GPED_TYPE_STORAGE)
108 printf("0x%08x -- type unknown %d\n", id, type);
110 printf("0x%08x %c 0x%02x %jd\n", id, ralwd ? '*' : ' ', health, cap);
112 printf("* -- Element can be restored\n");
119 depop_remove(struct cam_device *device, int task_attr, int retry_count,
120 int timeout, int verbosemode __unused, uint32_t elem, uint64_t capacity)
125 ccb = cam_getccb(device);
127 warnx("Can't allocate ccb");
130 scsi_remove_element_and_truncate(&ccb->csio,
138 /* Disable freezing the device queue */
139 ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
140 if (cam_send_ccb(device, ccb) < 0) {
141 warn("error sending GET PHYSICAL ELEMENT STATUS command");
146 if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
147 cam_error_print(device, ccb, CAM_ESF_ALL,
148 CAM_EPF_ALL, stderr);
158 depop_restore(struct cam_device *device, int task_attr, int retry_count,
159 int timeout, int verbosemode __unused)
164 ccb = cam_getccb(device);
166 warnx("Can't allocate ccb");
169 scsi_restore_elements_and_rebuild(&ccb->csio,
176 /* Disable freezing the device queue */
177 ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
178 if (cam_send_ccb(device, ccb) < 0) {
179 warn("error sending GET PHYSICAL ELEMENT STATUS command");
184 if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
185 cam_error_print(device, ccb, CAM_ESF_ALL,
186 CAM_EPF_ALL, stderr);
195 #define MUST_BE_NONE() \
196 if (action != DEPOP_NONE) { \
197 warnx("Use only one of -d, -l, or -r"); \
203 depop(struct cam_device *device, int argc, char **argv, char *combinedopt,
204 int task_attr, int retry_count, int timeout, int verbosemode)
207 int action = DEPOP_NONE;
211 uint64_t capacity = 0;
213 while ((c = getopt(argc, argv, combinedopt)) != -1) {
216 capacity = strtoumax(optarg, &endptr, 0);
217 if (*endptr != '\0') {
218 warnx("Invalid capacity: %s", optarg);
224 elem = strtoul(optarg, &endptr, 0);
225 if (*endptr != '\0') {
226 warnx("Invalid element: %s", optarg);
233 action = DEPOP_REMOVE;
241 action = DEPOP_RESTORE;
249 * Compute a sane timeout if none given. 5 seconds for the list command
250 * and whatever the block device characteristics VPD says for other
251 * depop commands. If there's no value in that field, default to 1
252 * day. Experience has shown that these operations take the better part
253 * of a day to complete, so a 1 day timeout default seems appropriate.
255 if (timeout == 0 && action != DEPOP_NONE) {
256 if (action == DEPOP_LIST) {
259 struct scsi_vpd_block_device_characteristics *bdc;
261 timeout = 24 * 60 * 60 * 1000; /* 1 day */
262 bdc = scsi_wrap_vpd_block_device_characteristics(device);
264 timeout = scsi_4btoul(bdc->depopulation_time);
272 warnx("Must specify one of -d, -l, or -r");
276 if (elem == 0 && capacity == 0) {
277 warnx("Must specify at least one of -e and/or -c");
281 error = depop_remove(device, task_attr, retry_count, timeout,
282 verbosemode, elem, capacity);
285 error = depop_restore(device, task_attr, retry_count, timeout,
289 error = depop_list(device, task_attr, retry_count, timeout,