2 #------------------------------------------------------------------------------
3 # $File: ole2compounddocs,v 1.7 2019/08/02 18:08:18 christos Exp $
4 # Microsoft OLE 2 Compound Documents : file(1) magic for Microsoft Structured
5 # storage (https://en.wikipedia.org/wiki/Compound_File_Binary_Format)
6 # Additional tests for OLE 2 Compound Documents should be under this recipe.
7 # reference: https://www.openoffice.org/sc/compdocfileformat.pdf
9 0 string \320\317\021\340\241\261\032\341
10 # https://digital-preservation.github.io/droid/
11 # skip droid skeleton like fmt-39-signature-id-128.doc by valid version
12 >0x1A ushort !0xABAB OLE 2 Compound Document
13 #>0x1C uleshort x \b, endnian 0x%4.4x
14 # big endian not tested
15 >>0x1C ubeshort =0xfffe \b, big-endian
16 >>>546 string jbjb : Microsoft Word Document
17 !:mime application/msword
20 # Byte Order 0xFFFE means little-endian found in real world applications
21 #>>0x1C uleshort =0xfffe \b, little-endian
22 >>0x1C uleshort =0xfffe
23 # From: Joerg Jenderek
24 # Major Version 3 or 4
25 >>>0x1A uleshort x \b, v%u
26 # Minor Version 32h=50 3Bh=59 3Eh=62
27 >>>0x18 uleshort x \b.%u
28 # SecID of first sector of the directory stream is often 1 but high like 3144h
29 >>>48 ulelong x \b, SecID 0x%x
30 # pointer to root entry only works with standard configuration for SecID ~< 800h
31 # Red-Carpet-presentation-1.0-1.sdd sg10.sdv 2000_GA_Annual_Review_Data.xls
32 # "ORLEN Factbook 2017.xls" XnView_metadata.doc
33 # "Barham, Lisa - Die Shopping-Prinzessinnen.doc" then not recognized
34 >>>48 ulelong >0x800 too big for FILE_BYTES_MAX = 1 MiB
35 # Sector Shift Exponent 9~512 for major version 3 or C~4096 for major version 4
36 >>>0x1E uleshort 0xc \b, blocksize 4096
37 # jump to one block (4096 bytes per block) before root storage block
38 >>>>(48.l*4096) ubyte x
39 >>>>>&4095 use ole2-directory
40 #>>>0x1E uleshort 9 \b, blocksize 512
42 # jump to one block (512 bytes per block) before root storage block
43 # in 5.37 only true for offset ~< FILE_BYTES_MAX=7 MiB defined in ../../src/file.h
44 >>>>(48.l*512) ubyte x
45 >>>>>&511 use ole2-directory
46 # check directory entry structure and display types by GUID
48 # directory entry name like "Root Entry"
49 #>0 lestring16 x \b, 1st %.10s
50 # type of the entry; 5~Root storage
51 #>66 ubyte x \b, type %x
52 # node colour of the entry: 00H ~ Red 01H ~ Black
53 #>67 ubyte x \b, color %x
54 # the DirIDs of the child nodes. Should both be –1 in the root storage entry
55 #>68 bequad !0xffffffffffffffff \b, DirIDs %llx
56 # second directory entry name like VisioDocument Control000
57 #>128 lestring16 x \b, 2nd %.20s
58 # third directory entry like WordDocument
59 #>256 lestring16 x \b, 3rd %.20s
61 #>384 lestring16 x \b, 4th %.10s
63 #>512 lestring16 x \b, 5th %.10s
65 #>640 lestring16 x \b, 6th %.10s
67 #>768 lestring16 x \b, 7th %.10s
68 # https://wikileaks.org/ciav7p1/cms/page_13762814.html
69 # https://m.blog.naver.com/superman4u/40047693679
70 # https://misc.daniel-marschall.de/projects/guid_analysis/guid.txt
71 # http://www.windowstricks.in/online-windows-guid-converter
72 #>80 ubequad !0 \b, clsid 0x%16.16llx
73 #>>88 ubequad x \b%16.16llx
74 # test for "Root Entry" inside directory by type 5 value
76 # look for CLSID GUID 0
79 # - Microstation V8 DGN files (www.bentley.com)
80 # URL: https://en.wikipedia.org/wiki/MicroStation
81 # Last update on 10/23/2006 by Lester Hightower
82 # 07/24/2019 by Joerg Jenderek
83 # Second directory entry name like Dgn~H Dgn~S
84 >>>>128 lestring16 Dgn~ : Microstation V8 CAD
85 #!:mime application/x-ole-storage
86 !:mime application/x-bentley-dgn
87 # http://www.q-cad.com/files/samples_cad_files/1344468165.dgn
90 # URL: http://fileformats.archiveteam.org/wiki/WordPerfect
91 # Second directory entry name PerfectOffice_
92 >>>>128 lestring16 PerfectOffice_ : WordPerfect 7-X3 presentations Master, Document or Graphic
93 !:mime application/vnd.wordperfect
94 # https://www.macdisk.com/macsigen.php "WPC2" for Wordperfect 2 *.wpd
98 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
99 # Second directory entry name MatOST_
100 >>>>128 lestring16 MatOST : Microsoft Works 3.0 document
101 !:mime application/vnd.ms-works
105 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Spreadsheet
106 # 3rd directory entry name WksSSWorkBook
107 >>>>256 lestring16 WksSSWorkBook : Microsoft Works 6-9 spreadsheet
108 !:mime application/vnd.ms-works
112 # URL: http://fileformats.archiveteam.org/wiki/XLS
113 # what is the difference to {00020820-0000-0000-c000-000000000046} ?
114 # Second directory entry name Workbook
115 >>>>128 lestring16 Workbook
116 >>>>>256 lestring16 !WksSSWorkBook : Microsoft Excel 97-2003 worksheet 0 clsid
117 !:mime application/vnd.ms-excel
118 # https://www.macdisk.com/macsigen.php XLS5 for Excel 5
122 # URL: http://fileformats.archiveteam.org/wiki/PPT
123 # Second directory entry name Object1 Object12 Object35
124 >>>>128 lestring16 Object : Microsoft PowerPoint 4 presentation
125 !:mime application/vnd.ms-powerpoint
126 # https://www.macdisk.com/macsigen.php
130 # URL: https://www.msoutlook.info/question/164
131 # Second directory entry name __CollDataStm
132 >>>>128 lestring16 __CollDataStm : Microsoft Outlook Send Receive Settings
133 #!:mime application/vnd.ms-outlook
134 !:mime application/x-ms-srs
135 # %APPDATA%\Microsoft\Outlook\Outlook.srs
138 # URL: https://www.file-extensions.org/cag-file-extension
139 # Second directory entry name Category
140 >>>>128 lestring16 Category : Microsoft Clip Art Gallery
141 #!:mime application/x-ole-storage
142 !:mime application/x-ms-cag
146 # URL: https://www.filesuffix.com/de/extension/rra
147 # 3rd directory entry name StrIndex_StringTable
148 >>>>256 lestring16 StrIndex_StringTable : Windows temporarily installer
149 #!:mime application/x-ole-storage
150 !:mime application/x-ms-rra
153 # URL: https://www.forensicswiki.org/wiki/Jump_Lists
154 # 3rd directory entry name DestList
155 >>>>256 lestring16 DestList : Windows jump list
156 #!:mime application/x-ole-storage
157 !:mime application/x-ms-jumplist
158 # %APPDATA%\Microsoft\Windows\Recent\AutomaticDestinations\*.automaticDestinations-ms
159 !:ext automaticDestinations-ms
161 # URL: https://en.wikipedia.org/wiki/Windows_thumbnail_cache
162 # Second directory entry name 256_
163 >>>>128 lestring16 256_ : Windows thumbnail database 256
164 #!:mime application/x-ole-storage
165 !:mime application/x-ms-thumbnail
168 >>>>128 lestring16 96_ : Windows thumbnail database 96
169 !:mime application/x-ms-thumbnail
171 # 3rd directory entry name Catalog_
172 >>>>256 lestring16 Catalog : Windows thumbnail database
173 !:mime application/x-ms-thumbnail
176 # URL: https://support.microsoft.com/en-us/help/300887/how-to-use-system-information-msinfo32-command-line-tool-switches
177 # Note: older Microsoft Systeminfo (MSInfo Configuration File of msinfo32); newer use xml based
178 # Second directory entry name Control000
179 >>>>128 lestring16 Control000 : Microsoft old Systeminfo
180 #!:mime application/x-ole-storage
181 !:mime application/x-ms-info
184 # URL: http://fileformats.archiveteam.org/wiki/Corel_Print_House
185 # Second directory entry name Thumbnail
186 >>>>128 lestring16 Thumbnail : Corel PrintHouse image
187 #!:mime application/x-ole-storage
188 !:mime application/x-corel-cph
190 # 3rd directory entry name Thumbnail
191 >>>>256 lestring16 Thumbnail : Corel PrintHouse image
192 !:mime application/x-corel-cph
195 # URL: https://en.wikipedia.org/wiki/Hangul_(word_processor)
196 # Note: "HWP Document File" signature found in FileHeader
197 # Second directory entry name FileHeader hint for Thinkfree Office document
198 >>>>128 lestring16 FileHeader : Hangul (Korean) 5.0 Word Processor File
199 #!:mime application/haansofthwp
200 !:mime application/x-hwp
201 # https://example-files.online-convert.com/document/hwp/example.hwp
204 # URL: https://ask.libreoffice.org/en/question/26303/creating-new-themes-for-the-gallery-not-functioning/
205 # Second directory entry name like dd2000 dd2001 dd2036 dd2060 dd2083
206 >>>>128 lestring16 dd2 : StarOffice Gallery view
207 #!:mime application/x-ole-storage
208 !:mime application/x-star-sdv
210 # remaining null clsid
211 >>>>128 default x : UNKNOWN
212 !:mime application/x-ole-storage
213 # look for known clsid GUID
215 # URL: http://fileformats.archiveteam.org/wiki/Visio
216 # Last update on 10/23/2006 by Lester Hightower, 07/20/2019 by Joerg Jenderek
217 >>88 ubequad 0xc000000000000046 : Microsoft
218 >>>80 ubequad 0x131a020000000000 Visio 2000-2002 Document, stencil or template
219 !:mime application/vnd.visio
220 # VSD~Drawing VSS~Stencil VST~Template
222 >>>80 ubequad 0x141a020000000000 Visio 2003-2010 Document, stencil or template
223 !:mime application/vnd.visio
226 # URL: http://fileformats.archiveteam.org/wiki/Windows_Installer
227 >>>80 ubequad 0x84100c0000000000 Windows Installer Package
228 !:mime application/x-msi
229 #!:mime application/x-ms-win-installer
231 >>>80 ubequad 0x86100c0000000000 Windows Installer Patch
233 !:mime application/x-wine-extension-msp
234 #!:mime application/x-ms-msp
237 # URL: http://fileformats.archiveteam.org/wiki/DOC
238 >>>80 ubequad 0x0009020000000000 Word 6-95 document or template
239 !:mime application/msword
240 # for template MSWDW8TN
243 >>>80 ubequad 0x0609020000000000 Word 97-2003 document or template
244 !:mime application/msword
246 # dot for template; no extension on Macintosh
249 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
250 >>>80 ubequad 0x0213020000000000 Works 3-4 document or template
251 !:mime application/vnd.ms-works
253 # ps for template https://filext.com/file-extension/PS bps for backup
256 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Database
257 >>>80 ubequad 0x0313020000000000 Works 3-4 database or template
258 !:mime application/vnd.ms-works-db
259 # https://www.macdisk.com/macsigen.php
261 # db for template www.file-extensions.org/db-file-extension-microsoft-works-data bdb for backup
264 # URL: https://en.wikipedia.org/wiki/Microsoft_Excel
265 >>>80 ubequad 0x1008020000000000 Excel 5-95 worksheet, addin or template
266 !:mime application/vnd.ms-excel
267 # https://www.macdisk.com/macsigen.php
269 # worksheet/addin/template/no extension on Macintosh
272 >>>80 ubequad 0x2008020000000000 Excel 97-2003
273 !:mime application/vnd.ms-excel
274 # https://www.macdisk.com/macsigen.php XLS5 for Excel 5
276 # 3nd directory entry name
277 >>>>256 lestring16 _VBA_PROJECT_CUR addin
279 # 4th directory entry name
280 >>>>384 lestring16 _VBA_PROJECT_CUR addin
283 >>>>256 default x worksheet or template
287 # URL: http://fileformats.archiveteam.org/wiki/OLE2
288 >>>80 ubequad 0x0b0d020000000000 Outlook 97-2003 item
289 #>>>80 ubequad 0x0b0d020000000000 Outlook 97-2003 Message
290 #!:mime application/vnd.ms-outlook
291 !:mime application/x-ms-msg
293 # URL: https://wiki.fileformat.com/email/oft/
294 >>>80 ubequad 0x46f0060000000000 Outlook 97-2003 item template
295 #!:mime application/vnd.ms-outlook
296 !:mime application/x-ms-oft
299 # URL: http://fileformats.archiveteam.org/wiki/PPT
300 >>>80 ubequad 0x5148040000000000 PowerPoint 4.0 presentation
301 !:mime application/vnd.ms-powerpoint
302 # https://www.macdisk.com/macsigen.php
306 # URL: http://www.checkfilename.com/view-details/Microsoft-Works/RespageIndex/0/sTab/2/
307 >>88 ubequad 0xa29a00aa004a1a72 : Microsoft
308 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
309 >>>80 ubequad 0xc2dbcd28e20ace11 Works 4 document
310 !:mime application/vnd.ms-works
314 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Database
315 >>>80 ubequad 0xc3dbcd28e20ace11 Works 4 database
316 !:mime application/vnd.ms-works-db
320 >>88 ubequad 0xa40700c04fb932ba : Microsoft
321 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Works_Word_Processor
322 >>>80 ubequad 0xb25aa40e0a9ed111 Works 5-6 document
323 !:mime application/vnd.ms-works
327 # URL: http://fileformats.archiveteam.org/wiki/Microsoft_Publisher
328 >>88 ubequad 0x00c0000000000046 : Microsoft
329 >>>80 ubequad 0x0112020000000000 Publisher
330 !:mime application/vnd.ms-publisher
333 # URL: http://fileformats.archiveteam.org/wiki/PPT
335 >>88 ubequad 0xa90300aa00510ea3 : Microsoft
336 >>>80 ubequad 0x70ae7bea3bfbcd11 PowerPoint 95 presentation
337 !:mime application/vnd.ms-powerpoint
338 # https://www.macdisk.com/macsigen.php
342 >>88 ubequad 0x86ea00aa00b929e8 : Microsoft
343 >>>80 ubequad 0x108d81649b4fcf11 PowerPoint 97-2003 presentation or template
344 !:mime application/vnd.ms-powerpoint
346 # /autostart/template
349 # URL: https://en.wikipedia.org/wiki/Microsoft_Project
351 >>88 ubequad 0xbe1100c04fb6faf1 : Microsoft
352 >>>80 ubequad 0x3a8fb774c8c8d111 Project
353 !:mime application/vnd.ms-project
356 # URL: http://fileformats.archiveteam.org/wiki/SHW_(Corel)
358 >>88 ubequad 0x99ae04021c007002 : WordPerfect
359 >>>80 ubequad 0x62fe2e4099191b10 7-X3 presentation
360 !:mime application/x-corelpresentations
361 #!:mime application/x-shw-viewer
362 #!:mime image/x-presentations
365 # URL: http://www.checkfilename.com/view-details/WordPerfect-Office-X3/RespageIndex/0/sTab/2/
366 >>>80 ubequad 0x60fe2e4099191b10 9 Graphic
367 #!:mime application/x-wpg
368 #!:mime image/x-wordperfect-graphics
370 # https://www.macdisk.com/macsigen.php "WPC2" for Wordperfect 2 *.wpd
374 # URL: http://fileformats.archiveteam.org/wiki/StarOffice_binary_formats
375 >>88 ubequad 0x996104021c007002 : StarOffice
376 >>>80 ubequad 0x407e5cdc5cb31b10 StarWriter 3.0 document or template
377 # https://www.openoffice.org/framework/documentation/mimetypes/mimetypes.html
378 !:mime application/x-starwriter
381 >>>80 ubequad 0xa03f543fa6b61b10 StarCalc 3.0 spreadsheet or template
382 !:mime application/x-starcalc
385 >>>80 ubequad 0xe0aa10af6db31b10 StarDraw 3.0 drawing or template
386 !:mime application/x-starimpress
387 #!:mime application/x-stardraw
391 >>88 ubequad 0x89cb008029e4b0b1 : StarOffice
392 >>>80 ubequad 0x41d461633542d011 StarCalc 4.0 spreadsheet or template
393 !:mime application/x-starcalc
396 >>>80 ubequad 0x61b8a5c6d685d111 StarCalc 5.0 spreadsheet or template
397 !:mime application/vnd.stardivision.cal
400 >>>80 ubequad 0xc03c2d011642d011 StarImpress 4.0 presentation or template
401 !:mime application/x-starimpress
404 >>88 ubequad 0xb12a04021c007002 : StarOffice
405 >>>80 ubequad 0x600459d4fd351c10 StarMath 3.0
406 !:mime application/x-starmath
409 >>88 ubequad 0x8e2c00001b4cc711 : StarOffice
410 >>>80 ubequad 0xe0999cfb6d2c1c10 StarChart 3.0
411 !:mime application/x-starchart
414 >>88 ubequad 0xa45e00a0249d57b1 : StarOffice
415 >>>80 ubequad 0xb0e9048b0e42d011 StarWriter 4.0 document or template
416 !:mime application/x-starwriter
419 >>88 ubequad 0x89ca008029e4b0b1 : StarOffice
420 >>>80 ubequad 0xe1b7b3022542d011 StarMath 4.0
421 !:mime application/x-starmath
424 >>>80 ubequad 0xe0b7b3022542d011 StarChart 4.0
425 !:mime application/x-starchart
428 >>88 ubequad 0xa53f00a0249d57b1 : StarOffice
429 >>>80 ubequad 0x70c90a340de3d011 Master 4.0 document
430 !:mime application/x-starwriter-global
433 >>88 ubequad 0x89d0008029e4b0b1 : StarOffice
434 >>>80 ubequad 0x40e6b5ffde85d111 StarMath 5.0
435 !:mime application/vnd.stardivision.math
438 >>>80 ubequad 0xa005892ebd85d111 StarDraw 5.0 drawing or template
439 !:mime application/vnd.stardivision.draw
442 >>>80 ubequad 0x21725c56bc85d111 StarImpress 5.0 presentation or template
443 !:mime application/vnd.stardivision.impress
444 # sda is used for what?
447 >>>80 ubequad 0x214388bfdd85d111 StarChart 5.0
448 !:mime application/vnd.stardivision.chart
451 >>88 ubequad 0xaab4006097da561a : StarOffice
452 >>>80 ubequad 0xd1f90cc2ae85d111 StarWriter 5.0 document or template
453 !:mime application/vnd.stardivision.writer
456 >>>80 ubequad 0xd3f90cc2ae85d111 Master 5.0 document
457 !:mime application/vnd.stardivision.writer-global
460 # URL: http://fileformats.archiveteam.org/wiki/FlashPix
461 >>88 ubequad 0x855300aa00a1f95b : Kodak
462 >>>80 ubequad 0x0067615654c1ce11 FlashPIX Image
466 # remaining non null clsid
467 >>88 default x : UNKNOWN
468 !:mime application/x-ole-storage
469 >>>80 ubequad !0 \b, clsid 0x%16.16llx
470 >>>88 ubequad x \b%16.16llx