2 * Copyright (c) 2008, 2016 Christos Zoulas
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
29 FILE_RCSID("@(#)$File: readcdf.c,v 1.65 2017/04/08 20:58:03 christos Exp $")
43 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
46 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
48 static const struct nv {
52 { "Word", "msword", },
53 { "Excel", "vnd.ms-excel", },
54 { "Powerpoint", "vnd.ms-powerpoint", },
55 { "Crystal Reports", "x-rpt", },
56 { "Advanced Installer", "vnd.ms-msi", },
57 { "InstallShield", "vnd.ms-msi", },
58 { "Microsoft Patch Compiler", "vnd.ms-msi", },
59 { "NAnt", "vnd.ms-msi", },
60 { "Windows Installer", "vnd.ms-msi", },
63 { "Book", "vnd.ms-excel", },
64 { "Workbook", "vnd.ms-excel", },
65 { "WordDocument", "msword", },
66 { "PowerPoint", "vnd.ms-powerpoint", },
67 { "DigitalSignature", "vnd.ms-msi", },
70 { "Book", "Microsoft Excel", },
71 { "Workbook", "Microsoft Excel", },
72 { "WordDocument", "Microsoft Word", },
73 { "PowerPoint", "Microsoft PowerPoint", },
74 { "DigitalSignature", "Microsoft Installer", },
78 static const struct cv {
83 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
91 { 0x00000000000c1084ULL, 0x46000000000000c0ULL },
100 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
103 for (i = 0; cv[i].mime != NULL; i++) {
104 if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
108 fprintf(stderr, "unknown mime %" PRIx64 ", %" PRIx64 "\n", clsid[0],
115 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
118 const char *rv = NULL;
120 locale_t old_lc_ctype, c_lc_ctype;
122 c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
123 assert(c_lc_ctype != NULL);
124 old_lc_ctype = uselocale(c_lc_ctype);
125 assert(old_lc_ctype != NULL);
127 char *old_lc_ctype = setlocale(LC_CTYPE, "C");
129 for (i = 0; nv[i].pattern != NULL; i++)
130 if (strcasestr(vbuf, nv[i].pattern) != NULL) {
135 fprintf(stderr, "unknown app %s\n", vbuf);
138 (void)uselocale(old_lc_ctype);
139 freelocale(c_lc_ctype);
141 setlocale(LC_CTYPE, old_lc_ctype);
147 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
148 size_t count, const cdf_directory_t *root_storage)
154 const char *str = NULL;
158 if (!NOTMIME(ms) && root_storage)
159 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
162 for (i = 0; i < count; i++) {
163 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
164 switch (info[i].pi_type) {
168 if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
169 info[i].pi_s16) == -1)
173 if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
174 info[i].pi_s32) == -1)
178 if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
179 info[i].pi_u32) == -1)
183 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
188 if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
192 case CDF_LENGTH32_STRING:
193 case CDF_LENGTH32_WSTRING:
194 len = info[i].pi_str.s_len;
199 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
201 s = info[i].pi_str.s_buf;
202 e = info[i].pi_str.s_buf + len;
203 for (j = 0; s < e && j < sizeof(vbuf)
207 if (isprint((unsigned char)*s))
210 if (j == sizeof(vbuf))
215 if (file_printf(ms, ", %s: %s",
219 } else if (str == NULL && info[i].pi_id ==
220 CDF_PROPERTY_NAME_OF_APPLICATION) {
221 str = cdf_app_to_mime(vbuf, app2mime);
229 if (tp < 1000000000000000LL) {
230 cdf_print_elapsed_time(tbuf,
232 if (NOTMIME(ms) && file_printf(ms,
233 ", %s: %s", buf, tbuf) == -1)
237 cdf_timestamp_to_timespec(&ts, tp);
238 c = cdf_ctime(&ts.tv_sec, tbuf);
240 (ec = strchr(c, '\n')) != NULL)
243 if (NOTMIME(ms) && file_printf(ms,
244 ", %s: %s", buf, c) == -1)
258 if (file_printf(ms, "application/%s", str) == -1)
265 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
266 const cdf_stream_t *sst)
271 cdf_catalog_entry_t *ce;
274 if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
276 if (cdf_unpack_catalog(h, sst, &cat) == -1)
279 /* skip first entry since it has a , or paren */
280 for (i = 1; i < cat->cat_num; i++)
281 if (file_printf(ms, "%s%s",
282 cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
283 i == cat->cat_num - 1 ? "]" : ", ") == -1) {
289 if (file_printf(ms, "application/CDFV2") == -1)
296 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
297 const cdf_stream_t *sst, const cdf_directory_t *root_storage)
299 cdf_summary_info_header_t si;
300 cdf_property_info_t *info;
304 if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
310 if (file_printf(ms, "Composite Document File V2 Document")
314 if (file_printf(ms, ", %s Endian",
315 si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
319 if (file_printf(ms, ", Os: Windows, Version %d.%d",
320 si.si_os_version & 0xff,
321 (uint32_t)si.si_os_version >> 8) == -1)
325 if (file_printf(ms, ", Os: MacOS, Version %d.%d",
326 (uint32_t)si.si_os_version >> 8,
327 si.si_os_version & 0xff) == -1)
331 if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
332 si.si_os_version & 0xff,
333 (uint32_t)si.si_os_version >> 8) == -1)
338 str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
341 if (file_printf(ms, ", %s", str) == -1)
347 m = cdf_file_property_info(ms, info, count, root_storage);
350 return m == -1 ? -2 : m;
355 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
356 snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
357 PRIx64 "-%.12" PRIx64,
358 (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
359 (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
360 (uuid[0] >> 0) & (uint64_t)0x0000000000000ffffULL,
361 (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
362 (uuid[1] >> 0) & (uint64_t)0x0000fffffffffffffULL);
368 cdf_file_catalog_info(struct magic_set *ms, const cdf_info_t *info,
369 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
370 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn)
374 if ((i = cdf_read_user_stream(info, h, sat, ssat, sst,
375 dir, "Catalog", scn)) == -1)
378 cdf_dump_catalog(h, scn);
380 if ((i = cdf_file_catalog(ms, h, scn)) == -1)
386 cdf_check_summary_info(struct magic_set *ms, const cdf_info_t *info,
387 const cdf_header_t *h, const cdf_sat_t *sat, const cdf_sat_t *ssat,
388 const cdf_stream_t *sst, const cdf_dir_t *dir, cdf_stream_t *scn,
389 const cdf_directory_t *root_storage, const char **expn)
392 const char *str = NULL;
394 char name[__arraycount(d->d_name)];
398 cdf_dump_summary_info(h, scn);
400 if ((i = cdf_file_summary_info(ms, h, scn, root_storage)) < 0) {
401 *expn = "Can't expand summary_info";
406 for (j = 0; str == NULL && j < dir->dir_len; j++) {
407 d = &dir->dir_tab[j];
408 for (k = 0; k < sizeof(name); k++)
409 name[k] = (char)cdf_tole2(d->d_name[k]);
410 str = cdf_app_to_mime(name,
411 NOTMIME(ms) ? name2desc : name2mime);
415 if (file_printf(ms, "%s", str) == -1)
421 str = "vnd.ms-office";
422 if (file_printf(ms, "application/%s", str) == -1)
427 i = cdf_file_catalog_info(ms, info, h, sat, ssat, sst,
433 private struct sinfo {
436 const char *sections[5];
439 { "Encrypted", "encrypted",
441 "EncryptedPackage", "EncryptedSummary",
445 CDF_DIR_TYPE_USER_STREAM,
446 CDF_DIR_TYPE_USER_STREAM,
451 { "QuickBooks", "quickbooks",
454 "TaxForms", "PDFTaxForms", "modulesInBackup",
456 "mfbu_header", NULL, NULL, NULL, NULL,
460 CDF_DIR_TYPE_USER_STORAGE,
461 CDF_DIR_TYPE_USER_STORAGE,
462 CDF_DIR_TYPE_USER_STREAM,
464 CDF_DIR_TYPE_USER_STREAM,
468 { "Microsoft Excel", "vnd.ms-excel",
470 "Book", "Workbook", NULL, NULL, NULL,
473 CDF_DIR_TYPE_USER_STREAM,
474 CDF_DIR_TYPE_USER_STREAM,
478 { "Microsoft Word", "msword",
480 "WordDocument", NULL, NULL, NULL, NULL,
483 CDF_DIR_TYPE_USER_STREAM,
487 { "Microsoft PowerPoint", "vnd.ms-powerpoint",
489 "PowerPoint", NULL, NULL, NULL, NULL,
492 CDF_DIR_TYPE_USER_STREAM,
496 { "Microsoft Outlook Message", "vnd.ms-outlook",
498 "__properties_version1.0",
499 "__recip_version1.0_#00000000",
503 CDF_DIR_TYPE_USER_STREAM,
504 CDF_DIR_TYPE_USER_STORAGE,
511 cdf_file_dir_info(struct magic_set *ms, const cdf_dir_t *dir)
515 for (sd = 0; sd < __arraycount(sectioninfo); sd++) {
516 const struct sinfo *si = §ioninfo[sd];
517 for (j = 0; si->sections[j]; j++) {
518 if (cdf_find_stream(dir, si->sections[j], si->types[j])
522 fprintf(stderr, "Can't read %s\n", si->sections[j]);
525 if (si->sections[j] == NULL)
528 if (file_printf(ms, "CDFV2 %s", si->name) == -1)
531 if (file_printf(ms, "application/%s", si->mime) == -1)
540 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
546 cdf_stream_t sst, scn;
549 const char *expn = "";
550 const cdf_directory_t *root_storage;
556 if (ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION))
558 if (cdf_read_header(&info, &h) == -1)
564 if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
565 expn = "Can't read SAT";
569 cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
572 if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
573 expn = "Can't read SSAT";
577 cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
580 if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
581 expn = "Can't read directory";
585 if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
586 &root_storage)) == -1) {
587 expn = "Cannot read short stream";
591 cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
597 if (file_printf(ms, "CLSID %s, ",
598 format_clsid(clsbuf, sizeof(clsbuf),
599 root_storage->d_storage_uuid)) == -1)
605 if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
606 "FileHeader", &scn)) != -1) {
607 #define HWP5_SIGNATURE "HWP Document File"
608 if (scn.sst_len * scn.sst_ss >= sizeof(HWP5_SIGNATURE) - 1
609 && memcmp(scn.sst_tab, HWP5_SIGNATURE,
610 sizeof(HWP5_SIGNATURE) - 1) == 0) {
613 "Hangul (Korean) Word Processor File 5.x") == -1)
616 if (file_printf(ms, "application/x-hwp") == -1)
622 cdf_zero_stream(&scn);
626 if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
628 if (errno != ESRCH) {
629 expn = "Cannot read summary info";
632 i = cdf_check_summary_info(ms, &info, &h,
633 &sat, &ssat, &sst, &dir, &scn, root_storage, &expn);
634 cdf_zero_stream(&scn);
637 if ((i = cdf_read_doc_summary_info(&info, &h, &sat, &ssat,
638 &sst, &dir, &scn)) == -1) {
639 if (errno != ESRCH) {
640 expn = "Cannot read summary info";
643 i = cdf_check_summary_info(ms, &info, &h, &sat, &ssat,
644 &sst, &dir, &scn, root_storage, &expn);
648 i = cdf_file_dir_info(ms, &dir);
650 expn = "Cannot read section info";
653 cdf_zero_stream(&scn);
654 cdf_zero_stream(&sst);
665 "Composite Document File V2 Document") == -1)
668 if (file_printf(ms, ", %s", expn) == -1)
671 if (file_printf(ms, "application/CDFV2") == -1)