2 #------------------------------------------------------------------------------
3 # $File: ole2compounddocs,v 1.8 2020/03/28 23:10:30 christos Exp $
4 # Microsoft OLE 2 Compound Documents : file(1) magic for Microsoft Structured
5 # storage (https://en.wikipedia.org/wiki/Compound_File_Binary_Format)
6 # Additional tests for OLE 2 Compound Documents should be under this recipe.
7 # reference: https://www.openoffice.org/sc/compdocfileformat.pdf
9 0 string \320\317\021\340\241\261\032\341
10 # https://digital-preservation.github.io/droid/
11 # skip droid skeleton like fmt-39-signature-id-128.doc by valid version
12 >0x1A ushort !0xABAB OLE 2 Compound Document
13 #>0x1C uleshort x \b, endnian 0x%4.4x
14 # big endian not tested
15 >>0x1C ubeshort =0xfffe \b, big-endian
16 >>>546 string jbjb : Microsoft Word Document
17 !:mime application/msword
20 # Byte Order 0xFFFE means little-endian found in real world applications
21 #>>0x1C uleshort =0xfffe \b, little-endian
22 >>0x1C uleshort =0xfffe
23 # From: Joerg Jenderek
24 # Major Version 3 or 4
25 >>>0x1A uleshort x \b, v%u
26 # Minor Version 32h=50 3Bh=59 3Eh=62
27 >>>0x18 uleshort x \b.%u
28 # SecID of first sector of the directory stream is often 1 but high like 3144h
29 >>>48 ulelong x \b, SecID 0x%x
30 # Sector Shift Exponent in short-stream container stream: 6~64 bytes
31 >>>32 uleshort !6 \b, exponent of short stream %u
32 # total number of sectors used for the FAT
33 >>>44 ulelong >1 \b, %u FAT sectors
34 # SecID of first sector of the short-sector allocation table (Mini FAT)
35 # or -2 (End Of ChainSecID) if not extant
36 >>>60 ulelong !0xffFFffFE \b, Mini FAT start sector 0x%x
37 # total number of sectors used for the short-sector allocation table
38 >>>64 ulelong !1 \b, %u Mini FAT sector
41 # SecID of first sector of the master sector allocation table (DIFAT)
42 # or -2 (End Of Chain SecID) if no additional sectors used
43 >>>68 ulelong !0xffFFffFE \b, DIFAT start sector 0x%x
44 # total number of sectors used for the master sector allocation table (DIFAT)
45 >>>72 ulelong >0 \b, %u DIFAT sectors
46 # First part of the master sector allocation table (DIFAT) containing 109 SecIDs
47 #>>>76 ubequad x \b, DIFAT=0x%16.16llx
48 #>>>84 ubequad x \b%16.16llx...
49 # pointer to root entry only works with standard configuration for SecID ~< 800h
50 # Red-Carpet-presentation-1.0-1.sdd sg10.sdv 2000_GA_Annual_Review_Data.xls
51 # "ORLEN Factbook 2017.xls" XnView_metadata.doc
52 # "Barham, Lisa - Die Shopping-Prinzessinnen.doc" then not recognized
53 >>>48 ulelong >0x800 too big for FILE_BYTES_MAX = 1 MiB
54 # Sector Shift Exponent 9~512 for major version 3 or C~4096 for major version 4
55 >>>0x1E uleshort 0xc \b, blocksize 4096
56 # jump to one block (4096 bytes per block) before root storage block
57 >>>>(48.l*4096) ubyte x
58 >>>>>&4095 use ole2-directory
59 #>>>0x1E uleshort 9 \b, blocksize 512
61 # jump to one block (512 bytes per block) before root storage block
62 # in 5.37 only true for offset ~< FILE_BYTES_MAX=7 MiB defined in ../../src/file.h
63 >>>>(48.l*512) ubyte x
64 >>>>>&511 use ole2-directory
65 # check directory entry structure and display types by GUID
67 # directory entry name like "Root Entry"
68 #>0 lestring16 x \b, 1st %.10s
69 # type of the entry; 5~Root storage
70 #>66 ubyte x \b, type %x
71 # node colour of the entry: 00H ~ Red 01H ~ Black
72 #>67 ubyte x \b, color %x
73 # the DirIDs of the child nodes. Should both be -1 in the root storage entry
74 #>68 bequad !0xffffffffffffffff \b, DirIDs %llx
75 # second directory entry name like VisioDocument Control000
76 #>128 lestring16 x \b, 2nd %.20s
77 # third directory entry like WordDocument
78 #>256 lestring16 x \b, 3rd %.20s
80 #>384 lestring16 x \b, 4th %.10s
82 #>512 lestring16 x \b, 5th %.10s
84 #>640 lestring16 x \b, 6th %.10s
86 #>768 lestring16 x \b, 7th %.10s
87 # https://wikileaks.org/ciav7p1/cms/page_13762814.html
88 # https://m.blog.naver.com/superman4u/40047693679
89 # https://misc.daniel-marschall.de/projects/guid_analysis/guid.txt
90 # http://www.windowstricks.in/online-windows-guid-converter
91 #>80 ubequad !0 \b, clsid 0x%16.16llx
92 #>>88 ubequad x \b%16.16llx
93 # test for "Root Entry" inside directory by type 5 value
95 # look for CLSID GUID 0
98 # - Microstation V8 DGN files (www.bentley.com)
99 # URL: https://en.wikipedia.org/wiki/MicroStation
100 # Last update on 10/23/2006 by Lester Hightower
101 # 07/24/2019 by Joerg Jenderek
102 # Second directory entry name like Dgn~H Dgn~S
103 >>>>128 lestring16 Dgn~ : Microstation V8 CAD
104 #!:mime application/x-ole-storage
105 !:mime application/x-bentley-dgn
106 # http://www.q-cad.com/files/samples_cad_files/1344468165.dgn
109 # URL: http://fileformats.archiveteam.org/wiki/WordPerfect
110 # Second directory entry name PerfectOffice_
111 >>>>128 lestring16 PerfectOffice_ : WordPerfect 7-X3 presentations Master, Document or Graphic
112 !:mime application/vnd.wordperfect
113 # https://www.macdisk.com/macsigen.php "WPC2" for Wordperfect 2 *.wpd
117 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
118 # Second directory entry name MatOST_
119 >>>>128 lestring16 MatOST : Microsoft Works 3.0 document
120 !:mime application/vnd.ms-works
124 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Spreadsheet
125 # 3rd directory entry name WksSSWorkBook
126 >>>>256 lestring16 WksSSWorkBook : Microsoft Works 6-9 spreadsheet
127 !:mime application/vnd.ms-works
131 # URL: http://fileformats.archiveteam.org/wiki/XLS
132 # what is the difference to {00020820-0000-0000-c000-000000000046} ?
133 # Second directory entry name Workbook
134 >>>>128 lestring16 Workbook
135 >>>>>256 lestring16 !WksSSWorkBook : Microsoft Excel 97-2003 worksheet 0 clsid
136 !:mime application/vnd.ms-excel
137 # https://www.macdisk.com/macsigen.php XLS5 for Excel 5
141 # URL: http://fileformats.archiveteam.org/wiki/PPT
142 # Second directory entry name Object1 Object12 Object35
143 >>>>128 lestring16 Object : Microsoft PowerPoint 4 presentation
144 !:mime application/vnd.ms-powerpoint
145 # https://www.macdisk.com/macsigen.php
149 # URL: https://www.msoutlook.info/question/164
150 # Second directory entry name __CollDataStm
151 >>>>128 lestring16 __CollDataStm : Microsoft Outlook Send Receive Settings
152 #!:mime application/vnd.ms-outlook
153 !:mime application/x-ms-srs
154 # %APPDATA%\Microsoft\Outlook\Outlook.srs
157 # URL: https://www.file-extensions.org/cag-file-extension
158 # Second directory entry name Category
159 >>>>128 lestring16 Category : Microsoft Clip Art Gallery
160 #!:mime application/x-ole-storage
161 !:mime application/x-ms-cag
165 # URL: https://www.filesuffix.com/de/extension/rra
166 # 3rd directory entry name StrIndex_StringTable
167 >>>>256 lestring16 StrIndex_StringTable : Windows temporarily installer
168 #!:mime application/x-ole-storage
169 !:mime application/x-ms-rra
172 # URL: https://www.forensicswiki.org/wiki/Jump_Lists
173 # 3rd directory entry name DestList
174 >>>>256 lestring16 DestList : Windows jump list
175 #!:mime application/x-ole-storage
176 !:mime application/x-ms-jumplist
177 # %APPDATA%\Microsoft\Windows\Recent\AutomaticDestinations\*.automaticDestinations-ms
178 !:ext automaticDestinations-ms
180 # URL: https://en.wikipedia.org/wiki/Windows_thumbnail_cache
181 # Second directory entry name 256_
182 >>>>128 lestring16 256_ : Windows thumbnail database 256
183 #!:mime application/x-ole-storage
184 !:mime application/x-ms-thumbnail
187 >>>>128 lestring16 96_ : Windows thumbnail database 96
188 !:mime application/x-ms-thumbnail
190 # 3rd directory entry name Catalog_
191 >>>>256 lestring16 Catalog : Windows thumbnail database
192 !:mime application/x-ms-thumbnail
195 # URL: https://support.microsoft.com/en-us/help/300887/how-to-use-system-information-msinfo32-command-line-tool-switches
196 # Note: older Microsoft Systeminfo (MSInfo Configuration File of msinfo32); newer use xml based
197 # Second directory entry name Control000
198 >>>>128 lestring16 Control000 : Microsoft old Systeminfo
199 #!:mime application/x-ole-storage
200 !:mime application/x-ms-info
203 # URL: http://fileformats.archiveteam.org/wiki/Corel_Print_House
204 # Second directory entry name Thumbnail
205 >>>>128 lestring16 Thumbnail : Corel PrintHouse image
206 #!:mime application/x-ole-storage
207 !:mime application/x-corel-cph
209 # 3rd directory entry name Thumbnail
210 >>>>256 lestring16 Thumbnail : Corel PrintHouse image
211 !:mime application/x-corel-cph
214 # URL: https://en.wikipedia.org/wiki/Hangul_(word_processor)
215 # Note: "HWP Document File" signature found in FileHeader
216 # Second directory entry name FileHeader hint for Thinkfree Office document
217 >>>>128 lestring16 FileHeader : Hangul (Korean) 5.0 Word Processor File
218 #!:mime application/haansofthwp
219 !:mime application/x-hwp
220 # https://example-files.online-convert.com/document/hwp/example.hwp
223 # URL: https://ask.libreoffice.org/en/question/26303/creating-new-themes-for-the-gallery-not-functioning/
224 # Second directory entry name like dd2000 dd2001 dd2036 dd2060 dd2083
225 >>>>128 lestring16 dd2 : StarOffice Gallery view
226 #!:mime application/x-ole-storage
227 !:mime application/x-star-sdv
229 # URL: https://en.wikipedia.org/wiki/SoftMaker_Office
230 # second directory entry name Current User
231 >>>>128 lestring16 Current\ User : SoftMaker
232 # third directory entry name SMNativeObjData
233 >>>>>256 lestring16 SMNativeObjData
234 # 5th directory entry nane PowerPoint
235 >>>>>>512 lestring16 PowerPoint PowerPoint presentation or template
236 !:mime application/vnd.ms-powerpoint
238 # 4th directory entry name PowerPoint
239 >>>>>384 lestring16 PowerPoint Presentations or template
240 # http://extension.nirsoft.net/prv
241 !:mime application/vnd.softmaker.presentations
243 # third directory entry name like Current User
244 >>>>256 lestring16 Current\ User : SoftMaker
245 # 5th directory entry name PowerPoint
246 >>>>>512 lestring16 PowerPoint Presentations or template
247 # http://extension.nirsoft.net/prd
248 !:mime application/vnd.softmaker.presentations
250 # 2nd directory entry name Pictures
251 >>>>>>128 lestring16 Pictures with pictures
252 # remaining null clsid
253 >>>>128 default x : UNKNOWN
254 !:mime application/x-ole-storage
255 # look for known clsid GUID
257 # URL: http://fileformats.archiveteam.org/wiki/Visio
258 # Last update on 10/23/2006 by Lester Hightower, 07/20/2019 by Joerg Jenderek
259 >>88 ubequad 0xc000000000000046 : Microsoft
260 >>>80 ubequad 0x131a020000000000 Visio 2000-2002 Document, stencil or template
261 !:mime application/vnd.visio
262 # VSD~Drawing VSS~Stencil VST~Template
264 >>>80 ubequad 0x141a020000000000 Visio 2003-2010 Document, stencil or template
265 !:mime application/vnd.visio
268 # URL: http://fileformats.archiveteam.org/wiki/Windows_Installer
269 >>>80 ubequad 0x84100c0000000000 Windows Installer Package
270 !:mime application/x-msi
271 #!:mime application/x-ms-win-installer
273 >>>80 ubequad 0x86100c0000000000 Windows Installer Patch
275 !:mime application/x-wine-extension-msp
276 #!:mime application/x-ms-msp
279 # URL: http://fileformats.archiveteam.org/wiki/DOC
280 >>>80 ubequad 0x0009020000000000 Word 6-95 document or template
281 !:mime application/msword
282 # for template MSWDW8TN
285 >>>80 ubequad 0x0609020000000000 Word 97-2003 document or template
286 !:mime application/msword
288 # dot for template; no extension on Macintosh
291 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
292 >>>80 ubequad 0x0213020000000000 Works 3-4 document or template
293 !:mime application/vnd.ms-works
295 # ps for template https://filext.com/file-extension/PS bps for backup
298 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Database
299 >>>80 ubequad 0x0313020000000000 Works 3-4 database or template
300 !:mime application/vnd.ms-works-db
301 # https://www.macdisk.com/macsigen.php
303 # db for template www.file-extensions.org/db-file-extension-microsoft-works-data bdb for backup
306 # URL: https://en.wikipedia.org/wiki/Microsoft_Excel
307 >>>80 ubequad 0x1008020000000000 Excel 5-95 worksheet, addin or template
308 !:mime application/vnd.ms-excel
309 # https://www.macdisk.com/macsigen.php
311 # worksheet/addin/template/no extension on Macintosh
314 >>>80 ubequad 0x2008020000000000 Excel 97-2003
315 !:mime application/vnd.ms-excel
316 # https://www.macdisk.com/macsigen.php XLS5 for Excel 5
318 # 3nd directory entry name
319 >>>>256 lestring16 _VBA_PROJECT_CUR addin
321 # 4th directory entry name
322 >>>>384 lestring16 _VBA_PROJECT_CUR addin
325 >>>>256 default x worksheet or template
329 # URL: http://fileformats.archiveteam.org/wiki/OLE2
330 >>>80 ubequad 0x0b0d020000000000 Outlook 97-2003 item
331 #>>>80 ubequad 0x0b0d020000000000 Outlook 97-2003 Message
332 #!:mime application/vnd.ms-outlook
333 !:mime application/x-ms-msg
335 # URL: https://wiki.fileformat.com/email/oft/
336 >>>80 ubequad 0x46f0060000000000 Outlook 97-2003 item template
337 #!:mime application/vnd.ms-outlook
338 !:mime application/x-ms-oft
341 # URL: http://fileformats.archiveteam.org/wiki/PPT
342 >>>80 ubequad 0x5148040000000000 PowerPoint 4.0 presentation
343 !:mime application/vnd.ms-powerpoint
344 # https://www.macdisk.com/macsigen.php
348 # URL: http://www.checkfilename.com/view-details/Microsoft-Works/RespageIndex/0/sTab/2/
349 >>88 ubequad 0xa29a00aa004a1a72 : Microsoft
350 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
351 >>>80 ubequad 0xc2dbcd28e20ace11 Works 4 document
352 !:mime application/vnd.ms-works
356 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Database
357 >>>80 ubequad 0xc3dbcd28e20ace11 Works 4 database
358 !:mime application/vnd.ms-works-db
362 >>88 ubequad 0xa40700c04fb932ba : Microsoft
363 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
364 >>>80 ubequad 0xb25aa40e0a9ed111 Works 5-6 document
365 !:mime application/vnd.ms-works
369 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Publisher
370 >>88 ubequad 0x00c0000000000046 : Microsoft
371 >>>80 ubequad 0x0112020000000000 Publisher
372 !:mime application/vnd.ms-publisher
375 # URL: http://fileformats.archiveteam.org/wiki/PPT
377 >>88 ubequad 0xa90300aa00510ea3 : Microsoft
378 >>>80 ubequad 0x70ae7bea3bfbcd11 PowerPoint 95 presentation
379 !:mime application/vnd.ms-powerpoint
380 # https://www.macdisk.com/macsigen.php
384 >>88 ubequad 0x86ea00aa00b929e8 : Microsoft
385 >>>80 ubequad 0x108d81649b4fcf11 PowerPoint 97-2003 presentation or template
386 !:mime application/vnd.ms-powerpoint
388 # /autostart/template
391 # URL: https://en.wikipedia.org/wiki/Microsoft_Project
393 >>88 ubequad 0xbe1100c04fb6faf1 : Microsoft
394 >>>80 ubequad 0x3a8fb774c8c8d111 Project
395 !:mime application/vnd.ms-project
398 # URL: http://fileformats.archiveteam.org/wiki/SHW_(Corel)
400 >>88 ubequad 0x99ae04021c007002 : WordPerfect
401 >>>80 ubequad 0x62fe2e4099191b10 7-X3 presentation
402 !:mime application/x-corelpresentations
403 #!:mime application/x-shw-viewer
404 #!:mime image/x-presentations
407 # URL: http://www.checkfilename.com/view-details/WordPerfect-Office-X3/RespageIndex/0/sTab/2/
408 >>>80 ubequad 0x60fe2e4099191b10 9 Graphic
409 #!:mime application/x-wpg
410 #!:mime image/x-wordperfect-graphics
412 # https://www.macdisk.com/macsigen.php "WPC2" for Wordperfect 2 *.wpd
416 # URL: http://fileformats.archiveteam.org/wiki/StarOffice_binary_formats
417 >>88 ubequad 0x996104021c007002 : StarOffice
418 >>>80 ubequad 0x407e5cdc5cb31b10 StarWriter 3.0 document or template
419 # https://www.openoffice.org/framework/documentation/mimetypes/mimetypes.html
420 !:mime application/x-starwriter
423 >>>80 ubequad 0xa03f543fa6b61b10 StarCalc 3.0 spreadsheet or template
424 !:mime application/x-starcalc
427 >>>80 ubequad 0xe0aa10af6db31b10 StarDraw 3.0 drawing or template
428 !:mime application/x-starimpress
429 #!:mime application/x-stardraw
433 >>88 ubequad 0x89cb008029e4b0b1 : StarOffice
434 >>>80 ubequad 0x41d461633542d011 StarCalc 4.0 spreadsheet or template
435 !:mime application/x-starcalc
438 >>>80 ubequad 0x61b8a5c6d685d111 StarCalc 5.0 spreadsheet or template
439 !:mime application/vnd.stardivision.cal
442 >>>80 ubequad 0xc03c2d011642d011 StarImpress 4.0 presentation or template
443 !:mime application/x-starimpress
446 >>88 ubequad 0xb12a04021c007002 : StarOffice
447 >>>80 ubequad 0x600459d4fd351c10 StarMath 3.0
448 !:mime application/x-starmath
451 >>88 ubequad 0x8e2c00001b4cc711 : StarOffice
452 >>>80 ubequad 0xe0999cfb6d2c1c10 StarChart 3.0
453 !:mime application/x-starchart
456 >>88 ubequad 0xa45e00a0249d57b1 : StarOffice
457 >>>80 ubequad 0xb0e9048b0e42d011 StarWriter 4.0 document or template
458 !:mime application/x-starwriter
461 >>88 ubequad 0x89ca008029e4b0b1 : StarOffice
462 >>>80 ubequad 0xe1b7b3022542d011 StarMath 4.0
463 !:mime application/x-starmath
466 >>>80 ubequad 0xe0b7b3022542d011 StarChart 4.0
467 !:mime application/x-starchart
470 >>88 ubequad 0xa53f00a0249d57b1 : StarOffice
471 >>>80 ubequad 0x70c90a340de3d011 Master 4.0 document
472 !:mime application/x-starwriter-global
475 >>88 ubequad 0x89d0008029e4b0b1 : StarOffice
476 >>>80 ubequad 0x40e6b5ffde85d111 StarMath 5.0
477 !:mime application/vnd.stardivision.math
480 >>>80 ubequad 0xa005892ebd85d111 StarDraw 5.0 drawing or template
481 !:mime application/vnd.stardivision.draw
484 >>>80 ubequad 0x21725c56bc85d111 StarImpress 5.0 presentation or template
485 !:mime application/vnd.stardivision.impress
486 # sda is used for what?
489 >>>80 ubequad 0x214388bfdd85d111 StarChart 5.0
490 !:mime application/vnd.stardivision.chart
493 >>88 ubequad 0xaab4006097da561a : StarOffice
494 >>>80 ubequad 0xd1f90cc2ae85d111 StarWriter 5.0 document or template
495 !:mime application/vnd.stardivision.writer
498 >>>80 ubequad 0xd3f90cc2ae85d111 Master 5.0 document
499 !:mime application/vnd.stardivision.writer-global
502 # URL: http://fileformats.archiveteam.org/wiki/FlashPix
503 >>88 ubequad 0x855300aa00a1f95b : Kodak
504 >>>80 ubequad 0x0067615654c1ce11 FlashPIX Image
508 # URL: https://en.wikipedia.org/wiki/SoftMaker_Office
509 >>88 ubequad 0x95f600a0cc3cca14 : PlanMaker
510 >>>80 ubequad 0x9174088a6452d411 document or template
511 !:mime application/vnd.softmaker.planmaker
512 # pmv for template https://www.file-extensions.org/pmv-file-extension
514 # remaining non null clsid
515 >>88 default x : UNKNOWN
516 !:mime application/x-ole-storage
517 >>>80 ubequad !0 \b, clsid 0x%16.16llx
518 >>>88 ubequad x \b%16.16llx