1 #------------------------------------------------------------------------------
2 # $File: archive,v 1.138 2020/06/07 23:29:26 christos Exp $
3 # archive: file(1) magic for archive formats (see also "msdos" for self-
4 # extracting compressed archives)
6 # cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7 # pre-POSIX "tar" archives are also handled in the C code ../../src/is_tar.c.
10 # URL: https://en.wikipedia.org/wiki/Tar_(computing)
11 # Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
12 # header mainly padded with nul bytes
15 # filename or extended attribute printable strings in range space null til umlaut ue
18 # last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
19 # at https://sourceforge.net/projects/s-tar/files/testscripts/
20 >>>508 ubelong&0x8B9E8DFF 0
21 # nul, space or ascii digit 0-7 at start of mode
23 >>>>>101 ubyte&0xC8 =0
24 # nul, space at end of check sum
25 >>>>>>155 ubyte&0xDF =0
26 # space or ascii digit 0 at start of check sum
27 >>>>>>>148 ubyte&0xEF =0x20
28 >>>>>>>>0 use tar-file
29 # minimal check and then display tar archive information which can also be
30 # embedded inside others like Android Backup, Clam AntiVirus database
33 # header padded with nuls
35 # GNU tar version 1.29 with non pax format option without refusing
36 # creates misleading V7 header for Long path, Multi-volume, Volume type
37 >>>156 ubyte 0x4c GNU tar archive
38 !:mime application/x-gtar
40 >>>156 ubyte 0x4d GNU tar archive
41 !:mime application/x-gtar
43 >>>156 ubyte 0x56 GNU tar archive
44 !:mime application/x-gtar
46 >>>156 default x tar archive (V7)
47 !:mime application/x-tar
49 # other stuff in padding
50 # some implementations add new fields to the blank area at the end of the header record
51 # created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
52 >>257 ulong !0 tar archive (old)
53 !:mime application/x-tar
55 # magic in newer, GNU, posix variants
57 # 2 last char of magic and UStar version because string expression does not work
58 # 2 space characters followed by a null for GNU variant
59 >>261 ubelong =0x72202000 POSIX tar archive (GNU)
60 !:mime application/x-gtar
62 # UStar version with ASCII "00"
63 >>261 ubelong 0x72003030 POSIX
64 # gLOBAL and ExTENSION type only found in POSIX.1-2001 format
65 >>>156 ubyte 0x67 \b.1-2001
66 >>>156 ubyte 0x78 \b.1-2001
67 >>>156 ubyte x tar archive
68 !:mime application/x-ustar
70 # version with 2 binary nuls embedded in Android Backup like com.android.settings.ab
71 >>261 ubelong 0x72000000 tar archive (ustar)
72 !:mime application/x-ustar
74 # not seen ustar variant with garbish version
75 >>261 default x tar archive (unknown ustar)
76 !:mime application/x-ustar
78 # type flag of 1st tar archive member
79 #>156 ubyte x \b, %c-type
81 >>156 ubyte 0 \b, file
82 >>156 ubyte 0x30 \b, file
83 >>156 ubyte 0x31 \b, hard link
84 >>156 ubyte 0x32 \b, symlink
85 >>156 ubyte 0x33 \b, char device
86 >>156 ubyte 0x34 \b, block device
87 >>156 ubyte 0x35 \b, directory
88 >>156 ubyte 0x36 \b, fifo
89 >>156 ubyte 0x37 \b, reserved
90 >>156 ubyte 0x4c \b, long path
91 >>156 ubyte 0x4d \b, multi volume
92 >>156 ubyte 0x56 \b, volume
93 >>156 ubyte 0x67 \b, global
94 >>156 ubyte 0x78 \b, extension
95 >>156 default x \b, type
99 # mode mainly stored as an octal number in ASCII null or space terminated
100 >100 string >\0 \b, mode %-.7s
101 # user id mainly as octal numbers in ASCII null or space terminated
102 >108 string >\0 \b, uid %-.7s
103 # group id mainly as octal numbers in ASCII null or space terminated
104 >116 string >\0 \b, gid %-.7s
105 # size mainly as octal number in ASCII
107 >>124 string >\0 \b, size %-.12s
108 # coding indicated by setting the high-order bit of the leftmost byte
109 >124 ubyte >0xEF \b, size 0x
110 >>124 ubyte !0xff \b%2.2x
111 >>125 ubyte !0xff \b%2.2x
112 >>126 ubyte !0xff \b%2.2x
113 >>127 ubyte !0xff \b%2.2x
114 >>128 ubyte !0xff \b%2.2x
115 >>129 ubyte !0xff \b%2.2x
116 >>130 ubyte !0xff \b%2.2x
117 >>131 ubyte !0xff \b%2.2x
118 >>132 ubyte !0xff \b%2.2x
119 >>133 ubyte !0xff \b%2.2x
120 >>134 ubyte !0xff \b%2.2x
121 >>135 ubyte !0xff \b%2.2x
122 # seconds since 0:0:0 1 jan 1970 UTC as octal number mainly in ASCII null or space terminated
123 >136 string >\0 \b, seconds %-.11s
124 # header checksum stored as an octal number in ASCII null or space terminated
125 #>148 string x \b, cksum %.7s
127 >157 string >\0 \b, linkname %-.40s
128 # additional fields for ustar
130 # owner user name null terminated
131 >>265 string >\0 \b, user %-.32s
132 # group name null terminated
133 >>297 string >\0 \b, group %-.32s
134 # device major minor if not zero
135 >>329 ubequad&0xCFCFCFCFcFcFcFdf !0
136 >>>329 string x \b, devmaj %-.7s
137 >>337 ubequad&0xCFCFCFCFcFcFcFdf !0
138 >>>337 string x \b, devmin %-.7s
140 >>345 string >\0 \b, prefix %-.155s
141 # old non ustar/POSIX tar
144 # padding[255] in old star
145 >>>257 string >\0 \b, padding: %-.40s
147 # padding[255] in old tar sometimes comment field
148 >>>257 string >\0 \b, comment: %-.40s
150 # Incremental snapshot gnu-tar format from:
151 # https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
152 0 string GNU\ tar- GNU tar incremental snapshot data
153 >&0 regex [0-9]\.[0-9]+-[0-9]+ version %s
157 # Yes, the top two "cpio archive" formats *are* supposed to just be "short".
158 # The idea is to indicate archives produced on machines with the same
159 # byte order as the machine running "file" with "cpio archive", and
160 # to indicate archives produced on machines with the opposite byte order
161 # from the machine running "file" with "byte-swapped cpio archive".
163 # The SVR4 "cpio(4)" hints that there are additional formats, but they
164 # are defined as "short"s; I think all the new formats are
165 # character-header formats and thus are strings, not numbers.
166 0 short 070707 cpio archive
167 !:mime application/x-cpio
168 0 short 0143561 byte-swapped cpio archive
169 !:mime application/x-cpio # encoding: swapped
170 0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
171 0 string 070701 ASCII cpio archive (SVR4 with no CRC)
172 0 string 070702 ASCII cpio archive (SVR4 with CRC)
175 # Various archive formats used by various versions of the "ar"
180 # Original UNIX archive formats.
181 # They were written with binary values in host byte order, and
182 # the magic number was a host "int", which might have been 16 bits
183 # or 32 bits. We don't say "PDP-11" or "VAX", as there might have
184 # been ports to little-endian 16-bit-int or 32-bit-int platforms
185 # (x86?) using some of those formats; if none existed, feel free
186 # to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
187 # 32-bit. There might have been big-endian ports of that sort as
190 0 leshort 0177555 very old 16-bit-int little-endian archive
191 0 beshort 0177555 very old 16-bit-int big-endian archive
192 0 lelong 0177555 very old 32-bit-int little-endian archive
193 0 belong 0177555 very old 32-bit-int big-endian archive
195 0 leshort 0177545 old 16-bit-int little-endian archive
196 >2 string __.SYMDEF random library
197 0 beshort 0177545 old 16-bit-int big-endian archive
198 >2 string __.SYMDEF random library
199 0 lelong 0177545 old 32-bit-int little-endian archive
200 >4 string __.SYMDEF random library
201 0 belong 0177545 old 32-bit-int big-endian archive
202 >4 string __.SYMDEF random library
205 # From "pdp" (but why a 4-byte quantity?)
207 0 lelong 0x39bed PDP-11 old archive
208 0 lelong 0x39bee PDP-11 4.0 archive
211 # XXX - what flavor of APL used this, and was it a variant of
212 # some ar archive format? It's similar to, but not the same
213 # as, the APL workspace magic numbers in pdp.
215 0 long 0100554 apl workspace
218 # System V Release 1 portable(?) archive format.
220 0 string =<ar> System V Release 1 ar archive
221 !:mime application/x-archive
224 # Debian package; it's in the portable archive format, and needs to go
225 # before the entry for regular portable archives, as it's recognized as
226 # a portable archive whose first member has a name beginning with
229 # Update: Joerg Jenderek
230 # URL: https://en.wikipedia.org/wiki/Deb_(file_format)
231 0 string =!<arch>\ndebian
232 # https://manpages.debian.org/testing/dpkg/dpkg-split.1.en.html
233 >14 string -split part of multipart Debian package
234 !:mime application/vnd.debian.binary-package
235 # udeb is used for stripped down deb file
237 >14 string -binary Debian binary package
238 !:mime application/vnd.debian.binary-package
239 # For ipk packager see also https://en.wikipedia.org/wiki/Opkg
241 # This should not happen
242 >14 default x Unknown Debian package
243 # NL terminated version; for most Debian cases this is 2.0 or 2.1 for splitted
244 >68 string >\0 (format %s)
246 #>>68 string x (format %.3s)
248 # 2nd archive name=control archive name like control.tar.gz or control.tar.xz
249 >>72 string >\0 \b, with %.14s
250 # look for 3rd archive name=data archive name like data.tar.{gz,xz,bz2,lzma}
251 >>0 search/0x93e4f data.tar. \b, data compression
252 # the above line only works if FILE_BYTES_MAX in ../../src/file.h is raised
253 # for example like libreoffice-dev-doc_1%3a5.2.7-1+rpi1+deb9u3_all.deb
255 # skip space (0x20 BSD) and slash (0x2f System V) character marking end of name
258 # display 3rd character of file name extension like 2 of bz2 or m of lzma
259 >>>>>&-1 ubyte x \b%c
261 >>>>>>>&-1 ubyte !0x2f
262 # display 4th character of file name extension like a of lzma
263 >>>>>>>>&-1 ubyte x \b%c
264 # splitted debian package case
266 # dpkg-1.18.25/dpkg-split/info.c
267 # NL terminated ASCII package name like ckermit
269 # NL terminated package version like 302-5.3
271 # NL terminated MD5 checksum
272 >>>>&1 string x \b, MD5 %s
273 # NL terminated original package length
274 >>>>>&1 string x \b, unsplitted size %s
275 # NL terminated part length
276 >>>>>>&1 string x \b, part length %s
277 # NL terminated package part like n/m
278 >>>>>>>&1 string x \b, part %s
279 # NL terminated package architecture like armhf since dpkg 1.16.1 or later
280 >>>>>>>>&1 string x \b, %s
283 # MIPS archive; they're in the portable archive format, and need to go
284 # before the entry for regular portable archives, as it's recognized as
285 # a portable archive whose first member has a name beginning with
288 0 string =!<arch>\n__________E MIPS archive
289 !:mime application/x-archive
290 >20 string U with MIPS Ucode members
291 >21 string L with MIPSEL members
292 >21 string B with MIPSEB members
293 >19 string L and an EL hash table
294 >19 string B and an EB hash table
295 >22 string X -- out of date
298 # BSD/SVR2-and-later portable archive formats.
300 # Update: Joerg Jenderek
301 # URL: http://fileformats.archiveteam.org/wiki/AR
302 # Reference: https://www.unix.com/man-page/opensolaris/3HEAD/ar.h/
303 # Note: Mach-O universal binary in ./cafebabe is dependent
304 # TODO: unify current ar archive, MIPS archive, Debian package
305 # distinguish BSD, SVR; 32, 64 bit; HP from other 32-bit SVR;
306 # *.ar packages from *.a libraries. handle empty archive
307 0 string =!<arch>\n current ar archive
308 # print first and possibly second ar_name[16] for debugging purpose
309 #>8 string x \b, 1st "%.16s"
310 #>68 string x \b, 2nd "%.16s"
311 !:mime application/x-archive
312 # a in most case for libraries; lib for Microsoft libraries; ar else cases
314 >8 string __.SYMDEF random library
315 # first member with long marked name __.SYMDEF SORTED implies BSD library
316 >68 string __.SYMDEF\ SORTED random library
317 # Reference: https://parisc.wiki.kernel.org/images-parisc/b/b2/Rad_11_0_32.pdf
318 # "archive file" entry moved from ./hp
319 # LST header system_id 0210h~PA-RISC 1.1,... identifies the target architecture
320 # LST header a_magic 0619h~relocatable library
321 >68 belong 0x020b0619 - PA-RISC1.0 relocatable library
322 >68 belong 0x02100619 - PA-RISC1.1 relocatable library
323 >68 belong 0x02110619 - PA-RISC1.2 relocatable library
324 >68 belong 0x02140619 - PA-RISC2.0 relocatable library
325 #EOF for common ar archives
328 # "Thin" archive, as can be produced by GNU ar.
330 0 string =!<thin>\n thin archive with
331 >68 belong 0 no symbol entries
332 >68 belong 1 %d symbol entry
333 >68 belong >1 %d symbol entries
335 0 search/1 -h- Software Tools format archive text
337 # ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
339 # The first byte is the magic (0x1a), byte 2 is the compression type for
340 # the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
341 # filename of the first file (null terminated). Since some types collide
342 # we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
343 # 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
344 0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
345 !:mime application/x-arc
346 0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
347 !:mime application/x-arc
348 0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
349 !:mime application/x-arc
350 0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
351 !:mime application/x-arc
352 0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
353 !:mime application/x-arc
354 0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
355 !:mime application/x-arc
356 # [JW] stuff taken from idarc, obviously ARC successors:
357 0 lelong&0x8080ffff 0x00000a1a PAK archive data
358 !:mime application/x-arc
359 0 lelong&0x8080ffff 0x0000141a ARC+ archive data
360 !:mime application/x-arc
361 0 lelong&0x8080ffff 0x0000481a HYP archive data
362 !:mime application/x-arc
364 # Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
365 # I can't create either SPARK or ArcFS archives so I have not tested this stuff
366 # [GRR: the original entries collide with ARC, above; replaced with combined
367 # version (not tested)]
368 #0 byte 0x1a RISC OS archive (spark format)
369 0 string \032archive RISC OS archive (ArcFS format)
370 0 string Archive\000 RISC OS archive (ArcFS format)
372 # All these were taken from idarc, many could not be verified. Unfortunately,
373 # there were many low-quality sigs, i.e. easy to trigger false positives.
374 # Please notify me of any real-world fishy/ambiguous signatures and I'll try
375 # to get my hands on the actual archiver and see if I find something better. [JW]
376 # probably many can be enhanced by finding some 0-byte or control char near the start
378 # idarc calls this Crush/Uncompressed... *shrug*
379 0 string CRUSH Crush archive data
381 0 string HLSQZ Squeeze It archive data
383 0 string SQWEZ SQWEZ archive data
385 0 string HPAK HPack archive data
387 0 string \x91\x33HF HAP archive data
389 0 string MDmd MDCD archive data
391 0 string LIM\x1a LIM archive data
393 3 string LH5 SAR archive data
395 0 string \212\3SB\020\0 BSArc/BS2 archive data
396 # Bethesda Softworks Archive (Oblivion)
397 0 string BSA\0 BSArc archive data
398 >4 lelong x version %d
400 2 string =-ah MAR archive data
402 #0 belong&0x00f800ff 0x00800000 ACB archive data
404 # TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data
406 0 string JRchive JRC archive data
408 0 string DS\0 Quantum archive data
410 0 string PK\3\6 ReSOF archive data
412 0 string 7\4 QuArk archive data
414 14 string YC YAC archive data
416 0 string X1 X1 archive data
417 0 string XhDr X1 archive data
419 0 belong&0xffffe000 0x76ff2000 CDC Codec archive data
421 0 string \xad6" AMGC archive data
423 0 string N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
425 0 string LEOLZW PAKLeo archive data
427 0 string SChF ChArc archive data
429 0 string PSA PSA archive data
431 0 string DSIGDCC CrossePAC archive data
433 0 string \x1f\x9f\x4a\x10\x0a Freeze archive data
435 0 string \xc2\xa8MP\xc2\xa8 KBoom archive data
436 # NSQ, must go after CDC Codec
437 0 string \x76\xff NSQ archive data
439 0 string Dirk\ Paehl DPA archive data
441 # TODO: idarc says "bytes 0-2 == bytes 3-5"
443 # URL: http://fileformats.archiveteam.org/wiki/TTComp_archive
444 # Update: Joerg Jenderek
445 # GRR: line below is too general as it matches also Panorama database "TCDB 2003-10 demo.pan", others
447 # look for first keyword of Panorama database *.pan
448 >12 search/261 DESIGN
449 # skip keyword with low entropy
450 >12 default x TTComp archive, binary, 4K dictionary
451 # (version 5.25) labeled the above entry as "TTComp archive data"
452 # From: Joerg Jenderek
453 # URL: https://wiki.68kmla.org/DiskCopy_4.2_format_specification
454 # reference: http://nulib.com/library/FTN.e00005.htm
456 # test for disk size equal or above 400k
457 >0x40 ubelong >409599 Apple DiskCopy 4.2 image
458 #!:mime application/octet-stream
461 # image pascal name padded with NULs like Microsoft Mail
463 # data size in bytes like 409600
464 >>0x40 ubelong x \b, %u bytes
466 >>0x44 ubelong >0 \b, 0x%x tag size
468 #>>0x48 ubelong x \b, 0x%x checksum
470 #>>0x4c ubelong x \b, 0x%x tag checksum
472 >>0x50 ubyte 0 \b, GCR CLV ssdd (400k)
473 >>0x50 ubyte 1 \b, GCR CLV dsdd (800k)
474 >>0x50 ubyte 2 \b, MFM CAV dsdd (720k)
475 >>0x50 ubyte 3 \b, MFM CAV dshd (1440k)
476 >>0x50 ubyte >3 \b, 0x%x encoding
478 >>0x51 ubyte x \b, 0x%x format
479 #>>0x54 ubequad x \b, data 0x%16.16llx
480 # ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
481 0 string ESP ESP archive data
483 0 string \1ZPK\1 ZPack archive data
485 0 string \xbc\x40 Sky archive data
487 0 string UFA UFA archive data
489 0 string =-H2O DRY archive data
491 0 string FOXSQZ FoxSQZ archive data
493 0 string ,AR7 AR7 archive data
495 0 string PPMZ PPMZ archive data
497 # Update: Joerg Jenderek
498 # URL: http://fileformats.archiveteam.org/wiki/MS-DOS_installation_compression
499 # Reference: https://hwiegman.home.xs4all.nl/fileformats/compress/szdd_kwaj_format.html
500 # Note: use correct version of extracting tool like EXPAND, UNPACK, DECOMP or 7Z
501 4 string \x88\xf0\x27
503 >0 string KWAJ MS Compress archive data, KWAJ variant
504 !:mime application/x-ms-compress-kwaj
505 # extension not working in version 5.32
506 # magic/Magdir/archive, 284: Warning: EXTENSION type ` ??_' has bad char '?'
507 # file: line 284: Bad magic entry ' ??_'
509 # compression method (0-4)
510 >>8 uleshort x \b, %u method
511 # offset of compressed data
512 >>10 uleshort x \b, 0x%x offset
514 #>>>&-6 string x \b, TEST extension %-.3s
515 # header flags to mark header extensions
516 >>12 uleshort >0 \b, 0x%x flags
517 # 4 bytes: decompressed length of file
519 >>>14 ulelong x \b, original size: %u bytes
520 # 2 bytes: unknown purpose
521 # 2 bytes: length of unknown data + mentioned bytes
522 # 1-9 bytes: null-terminated file name
523 # 1-4 bytes: null-terminated file extension
526 >>>>12 uleshort ^0x02
527 >>>>>12 uleshort ^0x04
528 >>>>>>12 uleshort ^0x10
529 >>>>>>>14 string x \b, %-.8s
530 >>>>>>12 uleshort &0x10
531 >>>>>>>14 string x \b, %-.8s
532 >>>>>>>>&1 string x \b.%-.3s
533 >>>>>12 uleshort &0x04
534 >>>>>>12 uleshort ^0x10
535 >>>>>>>(14.s) uleshort x
536 >>>>>>>>&14 string x \b, %-.8s
537 >>>>>>12 uleshort &0x10
538 >>>>>>>(14.s) uleshort x
539 >>>>>>>>&14 string x \b, %-.8s
540 >>>>>>>>>&1 string x \b.%-.3s
541 >>>>12 uleshort &0x02
542 >>>>>12 uleshort ^0x04
543 >>>>>>12 uleshort ^0x10
544 >>>>>>>16 string x \b, %-.8s
545 >>>>>>12 uleshort &0x10
546 >>>>>>>16 string x \b, %-.8s
547 >>>>>>>>&1 string x \b.%-.3s
548 >>>>>12 uleshort &0x04
549 >>>>>>12 uleshort ^0x10
550 >>>>>>>(16.s) uleshort x
551 >>>>>>>>&16 string x \b, %-.8s
552 >>>>>>12 uleshort &0x10
553 >>>>>>>(16.s) uleshort x
554 >>>>>>>&16 string x %-.8s
555 >>>>>>>>&1 string x \b.%-.3s
557 >>>>12 uleshort ^0x02
558 >>>>>12 uleshort ^0x04
559 >>>>>>12 uleshort ^0x10
560 >>>>>>>18 string x \b, %-.8s
561 >>>>>>12 uleshort &0x10
562 >>>>>>>18 string x \b, %-.8s
563 >>>>>>>>&1 string x \b.%-.3s
564 >>>>>12 uleshort &0x04
565 >>>>>>12 uleshort ^0x10
566 >>>>>>>(18.s) uleshort x
567 >>>>>>>>&18 string x \b, %-.8s
568 >>>>>>12 uleshort &0x10
569 >>>>>>>(18.s) uleshort x
570 >>>>>>>>&18 string x \b, %-.8s
571 >>>>>>>>>&1 string x \b.%-.3s
572 >>>>12 uleshort &0x02
573 >>>>>12 uleshort ^0x04
574 >>>>>>12 uleshort ^0x10
575 >>>>>>>20 string x \b, %-.8s
576 >>>>>>12 uleshort &0x10
577 >>>>>>>20 string x \b, %-.8s
578 >>>>>>>>&1 string x \b.%-.3s
579 >>>>>12 uleshort &0x04
580 >>>>>>12 uleshort ^0x10
581 >>>>>>>(20.s) uleshort x
582 >>>>>>>>&20 string x \b, %-.8s
583 >>>>>>12 uleshort &0x10
584 >>>>>>>(20.s) uleshort x
585 >>>>>>>>&20 string x \b, %-.8s
586 >>>>>>>>>&1 string x \b.%-.3s
587 # 2 bytes: length of data + mentioned bytes
589 # SZDD variant Haruhiko Okumura's LZSS or 7z type MsLZ
590 >0 string SZDD MS Compress archive data, SZDD variant
591 !:mime application/x-ms-compress-szdd
593 # The character missing from the end of the filename (0=unknown)
594 >>9 string >\0 \b, %-.1s is last character of original name
595 # https://www.betaarchive.com/forum/viewtopic.php?t=26161
596 # Compression mode: "A" (0x41) found but sometimes "B" in Windows 3.1 builds 026 and 034e
597 >>8 string !A \b, %-.1s method
598 >>10 ulelong >0 \b, original size: %u bytes
599 # QBasic SZDD variant
600 3 string \x88\xf0\x27
601 >0 string SZ\x20 MS Compress archive data, QBasic variant
602 !:mime application/x-ms-compress-sz
604 >>8 ulelong >0 \b, original size: %u bytes
606 # MP3 (archiver, not lossy audio compression)
607 0 string MP3\x1a MP3-Archiver archive data
609 0 string OZ\xc3\x9d ZET archive data
611 0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
613 0 string gW\4\1 ARQ archive data
615 3 string OctSqu Squash archive data
617 0 string \5\1\1\0 Terse archive data
619 0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
621 0 string UHA UHarc archive data
623 0 string \2AB ABComp archive data
624 0 string \3AB2 ABComp archive data
626 0 string CO\0 CMP archive data
628 0 string \x93\xb9\x06 Splint archive data
630 0 string \x13\x5d\x65\x8c InstallShield Z archive Data
632 1 string GTH Gather archive data
634 0 string BOA BOA archive data
636 0 string ULEB\xa RAX archive data
638 0 string ULEB\0 Xtreme archive data
640 0 string @\xc3\xa2\1\0 Pack Magic archive data
642 0 belong&0xfeffffff 0x1a034465 BTS archive data
644 0 string Ora\ ELI 5750 archive data
646 0 string \x1aFC\x1a QFC archive data
647 0 string \x1aQF\x1a QFC archive data
649 0 string RNC PRO-PACK archive data
651 0 string 777 777 archive data
653 0 string sTaC LZS221 archive data
655 0 string HPA HPA archive data
657 0 string LG Arhangel archive data
659 0 string 0123456789012345BZh EXP1 archive data
661 0 string IMP\xa IMP archive data
663 0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data
665 0 string \x73\xb2\x90\xf4 Squish archive data
667 0 string PHILIPP Par archive data
668 0 string PAR Par archive data
670 0 string UB HIT archive data
672 0 belong&0xfffff000 0x53423000 SBX archive data
674 0 string NSK NaShrink archive data
676 0 string #\ CAR\ archive\ header SAPCAR archive data
677 0 string CAR\ 2.00RG SAPCAR archive data
679 0 string DST Disintegrator archive data
681 0 string ASD ASD archive data
683 0 string ISc( InstallShield CAB
685 0 string T4\x1a TOP4 archive data
686 # BatComp left out: sig looks like COM executable
687 # so TODO: get real 4dos batcomp file and find sig
689 0 string BH\5\7 BlakHole archive data
691 0 string BIX0 BIX archive data
693 0 string ChfLZ ChiefLZA archive data
695 0 string Blink Blink archive data
697 0 string \xda\xfa Logitech Compress archive data
698 # ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
699 1 string (C)\ STEPANYUK ARS-Sfx archive data
701 0 string AKT32 AKT32 archive data
702 0 string AKT AKT archive data
704 0 string MSTSM NPack archive data
706 0 string \0\x50\0\x14 PFT archive data
708 0 string SEM SemOne archive data
710 0 string \x8f\xaf\xac\x84 PPMD archive data
712 0 string FIZ FIZ archive data
714 0 belong&0xfffff0f0 0x4d530000 MSXiE archive data
716 0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data
718 0 string =<DC- DC archive data
720 0 string \4TPAC\3 TPac archive data
722 0 string Ai\1\1\0 Ai archive data
723 0 string Ai\1\0\0 Ai archive data
725 0 string Ai\2\0 Ai32 archive data
726 0 string Ai\2\1 Ai32 archive data
728 0 string SBC SBC archive data
730 0 string YBS Ybs archive data
732 0 string \x9e\0\0 DitPack archive data
734 0 string DMS! DMS archive data
736 0 string \x8f\xaf\xac\x8c EPC archive data
738 0 string VS\x1a VSARC archive data
740 0 string PDZ PDZ archive data
742 0 string rdqx ReDuq archive data
744 0 string GCAX GCA archive data
746 0 string pN PPMN archive data
748 3 string WINIMAGE WinImage archive data
750 0 string CMP0CMP Compressia archive data
752 0 string UHB UHBC archive data
754 0 string \x61\x5C\x04\x05 WinHKI archive data
756 0 string WWP WWPack archive data
758 0 string \xffBSG BSN archive data
759 1 string \xffBSG BSN archive data
760 3 string \xffBSG BSN archive data
761 1 string \0\xae\2 BSN archive data
762 1 string \0\xae\3 BSN archive data
763 1 string \0\xae\7 BSN archive data
765 0 string \x33\x18 AIN archive data
766 0 string \x33\x17 AIN archive data
767 # XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
768 # SZip (TODO: doesn't catch all versions)
769 0 string SZ\x0a\4 SZip archive data
771 # *.XDI updated by Joerg Jenderek Sep 2015
772 # ftp://ftp.sac.sk/pub/sac/pack/0index.txt
773 # GRR: this test is still too general as it catches also text files starting with jm
775 # only found examples with this additional characteristic 2 bytes
776 >2 string \x2\x4 Xpack DiskImage archive data
779 # *.xpa updated by Joerg Jenderek Sep 2015
780 # ftp://ftp.elf.stuba.sk/pub/pc/pack/
784 # ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
785 # created by XPA32.EXE version 1.0.2 for Windows
786 >0 string xpa\0\1 \b32 archive data
787 # created by XPACK.COM version 1.67m or 1.67r with short 0x1800
788 >3 ubeshort !0x0001 \bck archive data
790 # changed by Joerg Jenderek Sep 2015 back to like in version 5.12
791 # letter 'I'+ acute accent is equivalent to \xcd
792 0 string \xcd\ jm Xpack single archive data
793 #!:mime application/x-xpa-compressed
796 # TODO: missing due to unknown magic/magic at end of file:
806 # These were inspired by idarc, but actually verified
807 # Dzip archiver (.dz)
808 # Update: Joerg Jenderek
809 # URL: http://speeddemosarchive.com/dzip/
810 # reference: http://speeddemosarchive.com/dzip/dz29src.zip/main.c
811 # GRR: line below is too general as it matches also ASCII texts like Doszip commander help dz.txt
813 # latest version is 2.9 dated 7 may 2003
814 >2 byte <4 Dzip archive data
815 !:mime application/x-dzip
817 >>2 byte x \b, version %i
819 >>4 ulelong x \b, offset 0x%x
820 >>8 ulelong x \b, %u files
821 # ZZip archiver (.zz)
822 0 string ZZ\ \0\0 ZZip archive data
823 0 string ZZ0 ZZip archive data
824 # PAQ archiver (.paq)
825 0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
826 0 string PAQ PAQ archive data
829 # JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
830 0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data
831 0 string JARCS JAR (ARJ Software, Inc.) archive data
833 # ARJ archiver (jason@jarthur.Claremont.EDU)
834 0 leshort 0xea60 ARJ archive data
835 !:mime application/x-arj
837 >8 byte &0x04 multi-volume,
838 >8 byte &0x10 slash-switched,
839 >8 byte &0x20 backup,
840 >34 string x original name: %s,
845 >7 byte 4 os: Macintosh
847 >7 byte 6 os: Apple ][ GS
848 >7 byte 7 os: Atari ST
850 >7 byte 9 os: VAX/VMS
852 # [JW] idarc says this is also possible
853 2 leshort 0xea60 ARJ archive data
855 # HA archiver (Greg Roelofs, newt@uchicago.edu)
856 # This is a really bad format. A file containing HAWAII will match this...
857 #0 string HA HA archive data,
858 #>2 leshort =1 1 file,
859 #>2 leshort >1 %hu files,
860 #>4 byte&0x0f =0 first is type CPY
861 #>4 byte&0x0f =1 first is type ASC
862 #>4 byte&0x0f =2 first is type HSC
863 #>4 byte&0x0f =0x0e first is type DIR
864 #>4 byte&0x0f =0x0f first is type SPECIAL
865 # suggestion: at least identify small archives (<1024 files)
866 0 belong&0xffff00fc 0x48410000 HA archive data
867 >2 leshort =1 1 file,
868 >2 leshort >1 %u files,
869 >4 byte&0x0f =0 first is type CPY
870 >4 byte&0x0f =1 first is type ASC
871 >4 byte&0x0f =2 first is type HSC
872 >4 byte&0x0f =0x0e first is type DIR
873 >4 byte&0x0f =0x0f first is type SPECIAL
875 # HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
876 0 string HPAK HPACK archive data
878 # JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
879 0 string \351,\001JAM\ JAM archive,
880 >7 string >\0 version %.4s
882 >>0x2b string >\0 label %.11s,
883 >>0x27 lelong x serial %08x,
884 >>0x36 string >\0 fstype %.8s
886 # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
887 # Update: Joerg Jenderek
888 # URL: https://en.wikipedia.org/wiki/LHA_(file_format)
889 # Reference: https://web.archive.org/web/20021005080911/http://www.osirusoft.com/joejared/lzhformat.html
891 # check and display information of lharc (LHa,PMarc) file
893 # check 1st character of method id like -lz4- -lh5- or -pm2-
895 # check 5th character of method id
897 # check header level 0 1 2 3
899 # check 2nd, 3th and 4th character of method id
900 >>>>3 regex \^(lh[0-9a-ex]|lz[s2-8]|pm[012]|pc1) \b
901 !:mime application/x-lzh-compressed
902 # creator type "LHA "
904 # display archive type name like "LHa/LZS archive data" or "LArc archive"
907 # already known -lzs- -lz4- -lz5- with old names
908 >>>>>>2 string -lzs LHa/LZS archive data
909 >>>>>>3 regex \^lz[45] LHarc 1.x archive data
910 # missing -lz?- with wikipedia names
911 >>>>>>3 regex \^lz[2378] LArc archive
912 # display archive type name like "LHa (2.x) archive data"
914 # already known -lh0- -lh1- -lh2- -lh3- -lh4- -lh5- -lh6- -lh7- -lhd- variants with old names
915 >>>>>>3 regex \^lh[01] LHarc 1.x/ARX archive data
916 # LHice archiver use ".ICE" as name extension instead usual one ".lzh"
917 # FOOBAR archiver use ".foo" as name extension instead usual one
918 # "Florain Orjanov's and Olga Bachetska's ARchiver" not found at the moment
919 >>>>>>>2 string -lh1 \b
921 >>>>>>3 regex \^lh[23d] LHa 2.x? archive data
922 >>>>>>3 regex \^lh[7] LHa (2.x)/LHark archive data
923 >>>>>>3 regex \^lh[456] LHa (2.x) archive data
924 >>>>>>>2 string -lh5 \b
925 # https://en.wikipedia.org/wiki/BIOS
926 # Some mainboard BIOS like Award use LHa compression. So archives with unusal extension are found like
927 # bios.rom , kd7_v14.bin, 1010.004, ...
928 !:ext lha/lzh/rom/bin
929 # missing -lh?- variants (Joe Jared)
930 >>>>>>3 regex \^lh[89a-ce] LHa (Joe Jared) archive
932 >>>>>>2 string -lhx LHa (UNLHA32) archive
933 # lha archives with standard file name extensions ".lha" ".lzh"
934 >>>>>>3 regex !\^(lh1|lh5) \b
936 # this should not happen if all -lh variants are described
937 >>>>>>2 default x LHa (unknown) archive
940 >>>>>3 regex \^pm[012] PMarc archive data
942 # append method id without leading and trailing minus character
943 >>>>>3 string x [%3.3s]
944 >>>>>>0 use lharc-header
946 # check and display information of lharc header
948 # header size 0x4 , 0x1b-0x61
950 # compressed data size != compressed file size
951 #>7 ulelong x \b, data size %d
952 # attribute: 0x2~?? 0x10~symlink|target 0x20~normal
953 #>19 ubyte x \b, 19_0x%x
954 # level identifier 0 1 2 3
955 #>20 ubyte x \b, level %d
957 #>15 ubelong x DATE 0x%8.8x
960 # 0x20 types find for *.rom files
961 >>(21.b+24) ubyte <0x21 \b, 0x%x OS
962 # ascii type like M for MSDOS
963 >>(21.b+24) ubyte >0x20 \b, '%c' OS
966 #>>23 ubyte x \b, OS ID 0x%x
967 >>23 ubyte <0x21 \b, 0x%x OS
968 >>23 ubyte >0x20 \b, '%c' OS
969 # filename only for level 0 and 1
972 >>21 ubyte >0 \b, with
976 #2 string -lh0- LHarc 1.x/ARX archive data [lh0]
977 #!:mime application/x-lharc
980 #2 string -lh1- LHarc 1.x/ARX archive data [lh1]
981 #!:mime application/x-lharc
984 # NEW -lz2- ... -lz8-
997 # [never seen any but the last; -lh4- reported in comp.compression:]
998 #2 string -lzs- LHa/LZS archive data [lzs]
1001 # According to wikipedia and others such a version does not exist
1002 #2 string -lh\40- LHa 2.x? archive data [lh ]
1003 #2 string -lhd- LHa 2.x? archive data [lhd]
1006 #2 string -lh2- LHa 2.x? archive data [lh2]
1009 #2 string -lh3- LHa 2.x? archive data [lh3]
1012 #2 string -lh4- LHa (2.x) archive data [lh4]
1015 #2 string -lh5- LHa (2.x) archive data [lh5]
1018 #2 string -lh6- LHa (2.x) archive data [lh6]
1021 #2 string -lh7- LHa (2.x)/LHark archive data [lh7]
1023 # !:mime application/x-lha
1024 # >20 byte x - header level %d
1026 # NEW -lh8- ... -lhe- , -lhx-
1041 # taken from idarc [JW]
1042 2 string -lZ PUT archive data
1043 # already done by LHarc magics
1044 # this should never happen if all sub types of LZS archive are identified
1045 #2 string -lz LZS archive data
1046 2 string -sw1- Swag archive data
1048 0 name rar-file-header
1049 >24 byte 15 \b, v1.5
1050 >24 byte 20 \b, v2.0
1052 >15 byte 0 \b, os: MS-DOS
1053 >15 byte 1 \b, os: OS/2
1054 >15 byte 2 \b, os: Win32
1055 >15 byte 3 \b, os: Unix
1056 >15 byte 4 \b, os: Mac OS
1057 >15 byte 5 \b, os: BeOS
1059 0 name rar-archive-header
1060 >3 leshort&0x1ff >0 \b, flags:
1061 >>3 leshort &0x01 ArchiveVolume
1062 >>3 leshort &0x02 Commented
1063 >>3 leshort &0x04 Locked
1064 >>3 leshort &0x10 NewVolumeNaming
1065 >>3 leshort &0x08 Solid
1066 >>3 leshort &0x20 Authenticated
1067 >>3 leshort &0x40 RecoveryRecordPresent
1068 >>3 leshort &0x80 EncryptedBlockHeader
1069 >>3 leshort &0x100 FirstVolume
1071 # RAR (Roshal Archive) archive
1072 0 string Rar!\x1a\7\0 RAR archive data
1073 !:mime application/x-rar
1076 >(0xc.l+9) byte 0x74
1077 >>(0xc.l+7) use rar-file-header
1078 # subblock seems to share information with file header
1079 >(0xc.l+9) byte 0x7a
1080 >>(0xc.l+7) use rar-file-header
1082 >>7 use rar-archive-header
1084 0 string Rar!\x1a\7\1\0 RAR archive data, v5
1085 !:mime application/x-rar
1088 # Very old RAR archive
1089 # https://jasonblanks.com/wp-includes/images/papers/KnowyourarchiveRAR.pdf
1090 0 string RE\x7e\x5e RAR archive data (<v1.5)
1091 !:mime application/x-rar
1094 # SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
1095 0 string SQSH squished archive data (Acorn RISCOS)
1097 # UC2 archiver (Greg Roelofs, newt@uchicago.edu)
1098 # [JW] see exe section for self-extracting version
1099 0 string UC2\x1a UC2 archive data
1101 # PKZIP multi-volume archive
1102 0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
1103 !:mime application/zip
1106 # Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1107 0 string PK\005\006 Zip archive data (empty)
1108 !:mime application/zip
1114 # Specialised zip formats which start with a member named 'mimetype'
1115 # (stored uncompressed, with no 'extra field') containing the file's MIME type.
1116 # Check for have 8-byte name, 0-byte extra field, name "mimetype", and
1117 # contents starting with "application/":
1118 >26 string \x8\0\0\0mimetypeapplication/
1120 # KOffice / OpenOffice & StarOffice / OpenDocument formats
1121 # From: Abel Cheung <abel@oaka.org>
1123 # KOffice (1.2 or above) formats
1124 # (mimetype contains "application/vnd.kde.<SUBTYPE>")
1125 >>50 string vnd.kde. KOffice (>=1.2)
1126 >>>58 string karbon Karbon document
1127 >>>58 string kchart KChart document
1128 >>>58 string kformula KFormula document
1129 >>>58 string kivio Kivio document
1130 >>>58 string kontour Kontour document
1131 >>>58 string kpresenter KPresenter document
1132 >>>58 string kspread KSpread document
1133 >>>58 string kword KWord document
1135 # OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
1136 # (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
1137 # URL: https://en.wikipedia.org/wiki/OpenOffice.org_XML
1138 # reference: http://fileformats.archiveteam.org/wiki/OpenOffice.org_XML
1139 >>50 string vnd.sun.xml. OpenOffice.org 1.x
1140 >>>62 string writer Writer
1141 >>>>68 byte !0x2e document
1142 !:mime application/vnd.sun.xml.writer
1144 >>>>68 string .template template
1145 !:mime application/vnd.sun.xml.writer.template
1147 >>>>68 string .web Web template
1148 !:mime application/vnd.sun.xml.writer.web
1150 >>>>68 string .global global document
1151 !:mime application/vnd.sun.xml.writer.global
1153 >>>62 string calc Calc
1154 >>>>66 byte !0x2e spreadsheet
1155 !:mime application/vnd.sun.xml.calc
1157 >>>>66 string .template template
1158 !:mime application/vnd.sun.xml.calc.template
1160 >>>62 string draw Draw
1161 >>>>66 byte !0x2e document
1162 !:mime application/vnd.sun.xml.draw
1164 >>>>66 string .template template
1165 !:mime application/vnd.sun.xml.draw.template
1167 >>>62 string impress Impress
1168 >>>>69 byte !0x2e presentation
1169 !:mime application/vnd.sun.xml.impress
1171 >>>>69 string .template template
1172 !:mime application/vnd.sun.xml.impress.template
1174 >>>62 string math Math document
1175 !:mime application/vnd.sun.xml.math
1177 >>>62 string base Database file
1178 !:mime application/vnd.sun.xml.base
1181 # OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
1182 # URL: http://fileformats.archiveteam.org/wiki/OpenDocument
1183 # https://lists.oasis-open.org/archives/office/200505/msg00006.html
1184 # (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
1185 >>50 string vnd.oasis.opendocument. OpenDocument
1187 >>>>77 byte !0x2d Text
1188 !:mime application/vnd.oasis.opendocument.text
1190 >>>>77 string -template Text Template
1191 !:mime application/vnd.oasis.opendocument.text-template
1193 >>>>77 string -web HTML Document Template
1194 !:mime application/vnd.oasis.opendocument.text-web
1196 >>>>77 string -master Master Document
1197 !:mime application/vnd.oasis.opendocument.text-master
1199 >>>73 string graphics
1200 >>>>81 byte !0x2d Drawing
1201 !:mime application/vnd.oasis.opendocument.graphics
1203 >>>>81 string -template Drawing Template
1204 !:mime application/vnd.oasis.opendocument.graphics-template
1206 >>>73 string presentation
1207 >>>>85 byte !0x2d Presentation
1208 !:mime application/vnd.oasis.opendocument.presentation
1210 >>>>85 string -template Presentation Template
1211 !:mime application/vnd.oasis.opendocument.presentation-template
1213 >>>73 string spreadsheet
1214 >>>>84 byte !0x2d Spreadsheet
1215 !:mime application/vnd.oasis.opendocument.spreadsheet
1217 >>>>84 string -template Spreadsheet Template
1218 !:mime application/vnd.oasis.opendocument.spreadsheet-template
1221 >>>>78 byte !0x2d Chart
1222 !:mime application/vnd.oasis.opendocument.chart
1224 >>>>78 string -template Chart Template
1225 !:mime application/vnd.oasis.opendocument.chart-template
1227 >>>73 string formula
1228 >>>>80 byte !0x2d Formula
1229 !:mime application/vnd.oasis.opendocument.formula
1231 >>>>80 string -template Formula Template
1232 !:mime application/vnd.oasis.opendocument.formula-template
1234 # https://www.loc.gov/preservation/digital/formats/fdd/fdd000441.shtml
1235 >>>73 string database Database
1236 !:mime application/vnd.oasis.opendocument.database
1238 # Valid for LibreOffice Base 6.0.1.1 at least
1239 >>>73 string base Database
1240 # https://bugs.documentfoundation.org/show_bug.cgi?id=45854
1241 !:mime application/vnd.oasis.opendocument.database
1242 #!:mime application/vnd.oasis.opendocument.base
1245 >>>>78 byte !0x2d Image
1246 !:mime application/vnd.oasis.opendocument.image
1248 >>>>78 string -template Image Template
1249 !:mime application/vnd.oasis.opendocument.image-template
1252 # EPUB (OEBPS) books using OCF (OEBPS Container Format)
1253 # https://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
1254 # From: Ralf Brown <ralf.brown@gmail.com>
1255 >>50 string epub+zip EPUB document
1256 !:mime application/epub+zip
1258 # From: Joerg Jenderek
1259 # URL: http://en.wikipedia.org/wiki/CorelDRAW
1260 # NOTE: version; til 2 WL-based; from 3 til 13 by ./riff; from 14 zip based
1261 >>50 string x-vnd.corel. Corel
1262 >>>62 string draw.document+zip Draw drawing, version 14-16
1263 !:mime application/x-vnd.corel.draw.document+zip
1265 >>>62 string draw.template+zip Draw template, version 14-16
1266 !:mime application/x-vnd.corel.draw.template+zip
1268 >>>62 string zcf.draw.document+zip Draw drawing, version 17-22
1269 !:mime application/x-vnd.corel.zcf.draw.document+zip
1271 >>>62 string zcf.draw.template+zip Draw template, version 17-22
1272 !:mime application/x-vnd.corel.zcf.draw.template+zip
1274 # URL: http://product.corel.com/help/CorelDRAW/540240626/Main/EN/Doc/CorelDRAW-Other-file-formats.html
1275 >>>62 string zcf.pattern+zip Draw pattern, version 22
1276 !:mime application/x-vnd.corel.zcf.pattern+zip
1278 # URL: https://en.wikipedia.org/wiki/Corel_Designer
1279 # Reference: http://fileformats.archiveteam.org/wiki/Corel_Designer
1280 # Note: called by TrID "Corel DESIGN graphics"
1281 >>>62 string designer.document+zip DESIGNER graphics, version 14-16
1282 !:mime application/x-vnd.corel.designer.document+zip
1284 >>>62 string zcf.designer.document+zip DESIGNER graphics, version 17-21
1285 !:mime application/x-vnd.corel.zcf.designer.document+zip
1287 # URL: http://product.corel.com/help/CorelDRAW/540223850/Main/EN/Documentation/
1288 # CorelDRAW-Corel-Symbol-Library-CSL.html
1289 >>>62 string symbol.library+zip Symbol Library, version 6-16.3
1290 !:mime application/x-vnd.corel.symbol.library+zip
1292 >>>62 string zcf.symbol.library+zip Symbol Library, version 17-22
1293 !:mime application/x-vnd.corel.zcf.symbol.library+zip
1296 # Catch other ZIP-with-mimetype formats
1297 # In a ZIP file, the bytes immediately after a member's contents are
1298 # always "PK". The 2 regex rules here print the "mimetype" member's
1299 # contents up to the first 'P'. Luckily, most MIME types don't contain
1300 # any capital 'P's. This is a kludge.
1301 # (mimetype contains "application/<OTHER>")
1302 >>50 default x Zip data
1303 >>>38 regex [!-OQ-~]+ (MIME type "%s"?)
1304 !:mime application/zip
1305 # (mimetype contents other than "application/*")
1306 >26 string \x8\0\0\0mimetype
1307 >>38 string !application/
1308 >>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?)
1309 !:mime application/zip
1312 >(26.s+30) leshort 0xcafe Java archive data (JAR)
1313 !:mime application/java-archive
1316 >(26.s+30) leshort !0xcafe
1317 >>26 string !\x8\0\0\0mimetype
1318 >>>30 string Payload/
1319 >>>>38 search/64 .app/ iOS App
1320 !:mime application/x-ios-app
1322 >30 search/100/b application/epub+zip EPUB document
1323 !:mime application/epub+zip
1325 # Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1326 # Next line excludes specialized formats:
1327 >(26.s+30) leshort !0xcafe
1328 >>30 search/100/b !application/epub+zip
1329 >>>26 string !\x8\0\0\0mimetype Zip archive data
1330 !:mime application/zip
1331 >>>>4 beshort x \b, at least
1332 >>>>4 use zipversion
1333 >>>>4 beshort x to extract
1334 >>>>0x161 string WINZIP \b, WinZIP self-extracting
1337 # From Pierre Ducroquet <pinaraf@pinaraf.info>
1338 0 string VCLMTF StarView MetaFile
1339 >6 beshort x \b, version %d
1340 >8 belong x \b, size %d
1343 20 lelong 0xfdc4a7dc Zoo archive data
1344 !:mime application/x-zoo
1345 >4 byte >48 \b, v%c.
1348 >32 byte >0 \b, modify: v%d
1350 >42 lelong 0xfdc4a7dc \b,
1351 >>70 byte >0 extract: v%d
1355 10 string #\ This\ is\ a\ shell\ archive shell archive text
1356 !:mime application/octet-stream
1359 # LBR. NB: May conflict with the questionable
1360 # "binary Computer Graphics Metafile" format.
1362 0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data
1364 # PMA (CP/M derivative of LHA)
1365 # Update: Joerg Jenderek
1366 # URL: https://en.wikipedia.org/wiki/LHA_(file_format)
1368 #2 string -pm0- PMarc archive data [pm0]
1371 #2 string -pm1- PMarc archive data [pm1]
1374 #2 string -pm2- PMarc archive data [pm2]
1377 2 string -pms- PMarc SFX archive (CP/M, DOS)
1378 #!:mime application/x-foobar-exec
1380 5 string -pc1- PopCom compressed executable (CP/M)
1381 #!:mime application/x-
1384 # From Rafael Laboissiere <rafael@laboissiere.net>
1385 # The Project Revision Control System (see
1386 # http://prcs.sourceforge.net) generates a packaged project
1387 # file which is recognized by the following entry:
1388 0 leshort 0xeb81 PRCS packaged project
1390 # Microsoft cabinets
1391 # by David Necas (Yeti) <yeti@physics.muni.cz>
1392 #0 string MSCF\0\0\0\0 Microsoft cabinet file data,
1395 # MPi: All CABs have version 1.3, so this is pointless.
1396 # Better magic in debian-additions.
1399 # by David Necas (Yeti) <yeti@physics.muni.cz>
1400 4 string gtktalog\ GTKtalog catalog data,
1401 >13 string 3 version 3
1402 >>14 beshort 0x677a (gzipped)
1403 >>14 beshort !0x677a (not gzipped)
1404 >13 string >3 version %s
1406 ############################################################################
1407 # Parity archive reconstruction file, the 'par' file format now used on Usenet.
1408 0 string PAR\0 PARity archive data
1409 >48 leshort =0 - Index file
1410 >48 leshort >0 - file number %d
1412 # Felix von Leitner <felix-file@fefe.de>
1413 0 string d8:announce BitTorrent file
1414 !:mime application/x-bittorrent
1415 # Durval Menezes, <jmgthbfile at durval dot com>
1416 0 string d13:announce-list BitTorrent file
1417 !:mime application/x-bittorrent
1418 0 string d7:comment BitTorrent file
1419 !:mime application/x-bittorrent
1420 0 string d4:info BitTorrent file
1421 !:mime application/x-bittorrent
1423 # Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
1424 0 beshort 0x0e0f Atari MSA archive data
1425 >2 beshort x \b, %d sectors per track
1426 >4 beshort 0 \b, 1 sided
1427 >4 beshort 1 \b, 2 sided
1428 >6 beshort x \b, starting track: %d
1429 >8 beshort x \b, ending track: %d
1431 # Alternate ZIP string (amc@arwen.cs.berkeley.edu)
1432 0 string PK00PK\003\004 Zip archive data
1433 !:mime application/zip
1436 # ACE archive (from http://www.wotsit.org/download.asp?f=ace)
1437 # by Stefan `Sec` Zehl <sec@42.org>
1438 7 string **ACE** ACE archive data
1439 >15 byte >0 version %d
1440 >16 byte =0x00 \b, from MS-DOS
1441 >16 byte =0x01 \b, from OS/2
1442 >16 byte =0x02 \b, from Win/32
1443 >16 byte =0x03 \b, from Unix
1444 >16 byte =0x04 \b, from MacOS
1445 >16 byte =0x05 \b, from WinNT
1446 >16 byte =0x06 \b, from Primos
1447 >16 byte =0x07 \b, from AppleGS
1448 >16 byte =0x08 \b, from Atari
1449 >16 byte =0x09 \b, from Vax/VMS
1450 >16 byte =0x0A \b, from Amiga
1451 >16 byte =0x0B \b, from Next
1452 >14 byte x \b, version %d to extract
1453 >5 leshort &0x0080 \b, multiple volumes,
1454 >>17 byte x \b (part %d),
1455 >5 leshort &0x0002 \b, contains comment
1456 >5 leshort &0x0200 \b, sfx
1457 >5 leshort &0x0400 \b, small dictionary
1458 >5 leshort &0x0800 \b, multi-volume
1459 >5 leshort &0x1000 \b, contains AV-String
1460 >>30 string \x16*UNREGISTERED\x20VERSION* (unregistered)
1461 >5 leshort &0x2000 \b, with recovery record
1462 >5 leshort &0x4000 \b, locked
1463 >5 leshort &0x8000 \b, solid
1464 # Date in MS-DOS format (whatever that is)
1465 #>18 lelong x Created on
1467 # sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
1469 0x1A string sfArk sfArk compressed Soundfont
1471 >>0x1 string >\0 Version %s
1472 >>0x2A string >\0 : %s
1474 # DR-DOS 7.03 Packed File *.??_
1475 0 string Packed\ File\ Personal NetWare Packed File
1476 >12 string x \b, was "%.12s"
1479 # From: Tilman Sauerbeck <tilman@code-monkey.de>
1480 0 belong 0x1ee7ff00 EET archive
1481 !:mime application/x-eet
1484 0 string RZIP rzip compressed data
1485 >4 byte x - version %d
1487 >6 belong x (%d bytes)
1489 # From: Joerg Jenderek
1490 # URL: https://help.foxitsoftware.com/kb/install-fzip-file.php
1491 # reference: http://mark0.net/download/triddefs_xml.7z/
1492 # defs/f/fzip.trid.xml
1493 # Note: unknown compression; No "PK" zip magic; normally in directory like
1494 # "%APPDATA%\Foxit Software\Addon\Foxit Reader\Install"
1495 0 ubequad 0x2506781901010000 Foxit add-on/update
1496 !:mime application/x-fzip
1499 # From: "Robert Dale" <robdale@gmail.com>
1500 0 belong 123 dar archive,
1501 >4 belong x label "%.8x
1503 >>>12 beshort x %.4x"
1504 >14 byte 0x54 end slice
1505 >14 beshort 0x4e4e multi-part
1506 >14 beshort 0x4e53 multi-part, with -S
1508 # Symbian installation files
1509 # https://www.thouky.co.uk/software/psifs/sis.html
1510 # http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
1511 8 lelong 0x10000419 Symbian installation file
1512 !:mime application/vnd.symbian.install
1513 >4 lelong 0x1000006D (EPOC release 3/4/5)
1514 >4 lelong 0x10003A12 (EPOC release 6)
1515 0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
1516 !:mime x-epoc/x-sisx-app
1518 # From "Nelson A. de Oliveira" <naoliv@gmail.com>
1519 0 string MPQ\032 MoPaQ (MPQ) archive
1521 # From: "Nelson A. de Oliveira" <naoliv@gmail.com>
1523 0 string KGB_arch KGB Archiver file
1524 >10 string x with compression level %.1s
1526 # xar (eXtensible ARchiver) archive
1527 # URL: https://en.wikipedia.org/wiki/Xar_(archiver)
1528 # xar archive format: https://code.google.com/p/xar/
1529 # From: "David Remahl" <dremahl@apple.com>
1530 # Update: Joerg Jenderek
1531 # TODO: lzma compression; X509Data for pkg and xip
1532 # Note: verified by `xar --dump-header -f FullBundleUpdate.xar` or
1533 # 7z t -txar Xcode_10.2_beta_4.xip`
1534 0 string xar! xar archive
1535 !:mime application/x-xar
1536 # pkg for Mac OSX installer package like FullBundleUpdate.pkg
1537 # xip for signed Apple software like Xcode_10.2_beta_4.xip
1539 # always 28 in older archives
1540 >4 ubeshort >28 \b, header size %u
1541 # currently there exit only version 1 since about 2014
1542 >6 ubeshort >1 version %u,
1543 >8 ubequad x compressed TOC: %llu,
1544 #>16 ubequad x uncompressed TOC: %llu,
1545 # cksum_alg 0-2 in older and also 3-4 in newer
1546 >24 belong 0 no checksum
1547 >24 belong 1 SHA-1 checksum
1548 >24 belong 2 MD5 checksum
1549 >24 belong 3 SHA-256 checksum
1550 >24 belong 4 SHA-512 checksum
1551 >24 belong >4 unknown 0x%x checksum
1552 #>24 belong >4 checksum
1553 # For no compression jump 0 bytes
1556 # jump more bytes forward by header size
1558 # jump more bytes forward by compressed table of contents size
1559 #>>>>&(8.Q) ubequad x \b, heap data 0x%llx
1561 # look for data by ./compress after message with 1 space at end
1562 >>>>>&-3 indirect x \b, contains
1563 # For SHA-1 jump 20 minus 2 bytes
1566 # jump more bytes forward by header size
1568 # jump more bytes forward by compressed table of contents size
1570 # data compressed by gzip, bzip, lzma or none
1571 >>>>>&-1 indirect x \b, contains
1572 # For SHA-256 jump 32 minus 2 bytes
1575 # jump more bytes forward by header size
1577 # jump more bytes forward by compressed table of contents size
1579 >>>>>&-1 indirect x \b, contains
1580 # For SHA-512 jump 64 minus 2 bytes
1583 # jump more bytes forward by header size
1585 # jump more bytes forward by compressed table of contents size
1587 >>>>>&-1 indirect x \b, contains
1589 # Type: Parity Archive
1590 # From: Daniel van Eeden <daniel_e@dds.nl>
1591 0 string PAR2 Parity Archive Volume Set
1593 # Bacula volume format. (Volumes always start with a block header.)
1594 # URL: https://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
1595 # From: Adam Buchbinder <adam.buchbinder@gmail.com>
1596 12 string BB02 Bacula volume
1597 >20 bedate x \b, started %s
1599 # ePub is XHTML + XML inside a ZIP archive. The first member of the
1600 # archive must be an uncompressed file called 'mimetype' with contents
1601 # 'application/epub+zip'
1604 # From: "Michael Gorny" <mgorny@gentoo.org>
1605 # ZPAQ: http://mattmahoney.net/dc/zpaq.html
1606 0 string zPQ ZPAQ stream
1607 >3 byte x \b, level %d
1608 # From: Barry Carter <carter.barry@gmail.com>
1609 # https://encode.ru/threads/456-zpaq-updates/page32
1610 0 string 7kSt ZPAQ file
1612 # BBeB ebook, unencrypted (LRF format)
1613 # URL: https://www.sven.de/librie/Librie/LrfFormat
1614 # From: Adam Buchbinder <adam.buchbinder@gmail.com>
1615 0 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted
1616 >8 beshort x \b, version %d
1617 >36 byte 1 \b, front-to-back
1618 >36 byte 16 \b, back-to-front
1619 >42 beshort x \b, (%dx,
1622 # Symantec GHOST image by Joerg Jenderek at May 2014
1623 # https://us.norton.com/ghost/
1624 # https://www.garykessler.net/library/file_sigs.html
1625 0 ubelong&0xFFFFf7f0 0xFEEF0100 Norton GHost image
1627 >2 ubyte&0x08 0x00 \b, first file
1628 # *.GHS or *.[0-9] with cns program option
1629 >2 ubyte&0x08 0x08 \b, split file
1630 # part of split index interesting for *.ghs
1632 # compression tag minus one equals numeric compression command line switch z[1-9]
1633 >3 ubyte 0 \b, no compression
1634 >3 ubyte 2 \b, fast compression (Z1)
1635 >3 ubyte 3 \b, medium compression (Z2)
1637 >>3 ubyte <11 \b, compression (Z%d-1)
1639 # ~ 30 byte password field only for *.gho
1640 >>12 ubequad !0 \b, password protected
1642 # 1~Image All, sector-by-sector only for *.gho
1643 >>>10 ubyte 1 \b, sector copy
1644 # 1~Image Boot track only for *.gho
1645 >>>43 ubyte 1 \b, boot track
1646 # 1~Image Disc only for *.gho implies Image Boot track and sector copy
1647 >>44 ubyte 1 \b, disc sector copy
1648 # optional image description only *.gho
1649 >>0xff string >\0 "%-.254s"
1650 # look for DOS sector end sequence
1651 >0xE08 search/7776 \x55\xAA
1652 >>&-512 indirect x \b; contains
1654 # Google Chrome extensions
1655 # https://developer.chrome.com/extensions/crx
1656 # https://developer.chrome.com/extensions/hosting
1657 0 string Cr24 Google Chrome extension
1658 !:mime application/x-chrome-extension
1659 >4 ulong x \b, version %u
1661 # SeqBox - Sequenced container
1663 # Marco Pontello marcopon@gmail.com
1664 # reference: https://github.com/MarcoPon/SeqBox
1665 0 string SBx SeqBox,
1666 >3 byte x version %d
1669 56 string USE\040LYNX\040TO\040DISSOLVE\040THIS\040FILE LyNX archive
1671 # From: Joerg Jenderek
1672 # URL: https://www.acronis.com/
1673 # Reference: https://en.wikipedia.org/wiki/TIB_(file_format)
1674 # Note: only tested with True Image 2013 Build 5962 and 2019 Build 14110
1675 0 ubequad 0xce24b9a220000000 Acronis True Image backup
1676 !:mime application/x-acronis-tib
1679 #>20 ubelong x \b, at 20 0x%x
1681 #>28 ubelong x \b, at 28 0x%x
1682 # strings like "Generic- SD/MMC 1.00" "Unknown Disk" "Msft Virtual Disk 1.0"
1684 # strings like "\Device\0000011e" "\Device\0000015a"
1685 #>0 search/0x6852300/cs \\Device\\
1686 #>>&-1 pstring x \b, %s
1687 # "\Device\HarddiskVolume30" "\Device\HarddiskVolume39"
1688 #>>>&1 search/180/cs \\Device\\
1689 #>>>>&-1 pstring x \b, %s
1690 #>>>>>&0 search/29/cs \0\0\xc8\0
1692 #>>>>>>&10 lestring16 x \b, disk label %11.11s
1693 #>>>>>>&9 plestring16 x \b, disk label "%11.11s"
1694 #>>>>>>&10 ubequad x %16.16llx
1697 # Gentoo XPAK binary package
1698 # by Michal Gorny <mgorny@gentoo.org>
1699 # https://gitweb.gentoo.org/proj/portage.git/tree/man/xpak.5
1701 >-16 string XPAKSTOP Gentoo binary package (XPAK)
1703 # From: Joerg Jenderek
1704 # URL: https://kodi.wiki/view/TexturePacker
1705 # Reference: https://mirrors.kodi.tv/releases/source/17.3-Krypton.tar.gz
1706 # /xbmc-Krypton/xbmc/guilib/XBTF.h
1707 # /xbmc-Krypton/xbmc/guilib/XBTF.cpp
1709 # skip ASCII text by looking for terminating \0 of path
1710 >264 ubyte 0 XBMC texture package
1711 !:mime application/x-xbmc-xbt
1714 >>4 string !2 \b, version %-.1s
1715 # nofFiles /xbmc-Krypton/xbmc/guilib/XBTFReader.cpp
1716 >>5 ulelong x \b, %u file
1719 # path[CXBTFFile[MaximumPathLength=256]
1720 >>9 string x \b, 1st %s