summaryrefslogtreecommitdiffstats
path: root/contrib/file/cdf.h
blob: 6fe2646e935d91599f35a8bc69eb3c2d0a1ecea6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*-
 * Copyright (c) 2008 Christos Zoulas
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Parse Composite Document Files, the format used in Microsoft Office
 * document files before they switched to zipped XML.
 * Info from: http://sc.openoffice.org/compdocfileformat.pdf
 *
 * N.B. This is the "Composite Document File" format, and not the
 * "Compound Document Format", nor the "Channel Definition Format".
 */

#ifndef _H_CDF_
#define _H_CDF_

#ifdef WIN32
#include <winsock2.h>
#define timespec timeval
#define tv_nsec tv_usec
#endif
#ifdef __DJGPP__
#define timespec timeval
#define tv_nsec tv_usec
#endif

typedef int32_t cdf_secid_t;

#define CDF_LOOP_LIMIT					10000

#define CDF_SECID_NULL					0
#define CDF_SECID_FREE					-1
#define CDF_SECID_END_OF_CHAIN				-2
#define CDF_SECID_SECTOR_ALLOCATION_TABLE		-3
#define CDF_SECID_MASTER_SECTOR_ALLOCATION_TABLE	-4

typedef struct {
	uint64_t	h_magic;
#define CDF_MAGIC	0xE11AB1A1E011CFD0LL
	uint64_t	h_uuid[2];
	uint16_t	h_revision;
	uint16_t	h_version;
	uint16_t	h_byte_order;
	uint16_t	h_sec_size_p2;
	uint16_t	h_short_sec_size_p2;
	uint8_t		h_unused0[10];
	uint32_t	h_num_sectors_in_sat;
	uint32_t	h_secid_first_directory;
	uint8_t		h_unused1[4];
	uint32_t	h_min_size_standard_stream;
	cdf_secid_t	h_secid_first_sector_in_short_sat;
	uint32_t	h_num_sectors_in_short_sat;
	cdf_secid_t	h_secid_first_sector_in_master_sat;
	uint32_t	h_num_sectors_in_master_sat;
	cdf_secid_t	h_master_sat[436/4];
} cdf_header_t;

#define CDF_SEC_SIZE(h) ((size_t)(1 << (h)->h_sec_size_p2))
#define CDF_SEC_POS(h, secid) (CDF_SEC_SIZE(h) + (secid) * CDF_SEC_SIZE(h))
#define CDF_SHORT_SEC_SIZE(h)	((size_t)(1 << (h)->h_short_sec_size_p2))
#define CDF_SHORT_SEC_POS(h, secid) ((secid) * CDF_SHORT_SEC_SIZE(h))

typedef int32_t cdf_dirid_t;
#define CDF_DIRID_NULL	-1

typedef int64_t cdf_timestamp_t;
#define CDF_BASE_YEAR	1601
#define CDF_TIME_PREC	10000000

typedef struct {
	uint16_t	d_name[32];
	uint16_t	d_namelen;
	uint8_t		d_type;
#define CDF_DIR_TYPE_EMPTY		0
#define CDF_DIR_TYPE_USER_STORAGE	1
#define CDF_DIR_TYPE_USER_STREAM	2
#define CDF_DIR_TYPE_LOCKBYTES		3
#define CDF_DIR_TYPE_PROPERTY		4
#define CDF_DIR_TYPE_ROOT_STORAGE	5
	uint8_t		d_color;
#define CDF_DIR_COLOR_READ	0
#define CDF_DIR_COLOR_BLACK	1
	cdf_dirid_t	d_left_child;
	cdf_dirid_t	d_right_child;
	cdf_dirid_t	d_storage;
	uint64_t	d_storage_uuid[2];
	uint32_t	d_flags;
	cdf_timestamp_t d_created;
	cdf_timestamp_t d_modified;
	cdf_secid_t	d_stream_first_sector;
	uint32_t	d_size;
	uint32_t	d_unused0;
} cdf_directory_t;

#define CDF_DIRECTORY_SIZE	128

typedef struct {
	cdf_secid_t *sat_tab;
	size_t sat_len;
} cdf_sat_t;

typedef struct {
	cdf_directory_t *dir_tab;
	size_t dir_len;
} cdf_dir_t;

typedef struct {
	void *sst_tab;
	size_t sst_len;
	size_t sst_dirlen;
} cdf_stream_t;

typedef struct {
	uint32_t	cl_dword;
	uint16_t	cl_word[2];
	uint8_t		cl_two[2];
	uint8_t		cl_six[6];
} cdf_classid_t;

typedef struct {
	uint16_t	si_byte_order;
	uint16_t	si_zero;
	uint16_t	si_os_version;
	uint16_t	si_os;
	cdf_classid_t	si_class;
	uint32_t	si_count;
} cdf_summary_info_header_t;

#define CDF_SECTION_DECLARATION_OFFSET 0x1c

typedef struct {
	cdf_classid_t	sd_class;
	uint32_t	sd_offset;
} cdf_section_declaration_t;

typedef struct {
	uint32_t	sh_len;
	uint32_t	sh_properties;
} cdf_section_header_t;

typedef struct {
	uint32_t	pi_id;
	uint32_t	pi_type;
	union {
		uint16_t	_pi_u16;
		int16_t		_pi_s16;
		uint32_t	_pi_u32;
		int32_t		_pi_s32;
		uint64_t	_pi_u64;
		int64_t		_pi_s64;
		cdf_timestamp_t _pi_tp;
		float		_pi_f;
		double		_pi_d;
		struct {
			uint32_t s_len;
			const char *s_buf;
		} _pi_str;
	} pi_val;
#define pi_u64	pi_val._pi_u64
#define pi_s64	pi_val._pi_s64
#define pi_u32	pi_val._pi_u32
#define pi_s32	pi_val._pi_s32
#define pi_u16	pi_val._pi_u16
#define pi_s16	pi_val._pi_s16
#define pi_f	pi_val._pi_f
#define pi_d	pi_val._pi_d
#define pi_tp	pi_val._pi_tp
#define pi_str	pi_val._pi_str
} cdf_property_info_t;

#define CDF_ROUND(val, by)     (((val) + (by) - 1) & ~((by) - 1))

/* Variant type definitions */
#define CDF_EMPTY		0x00000000
#define CDF_NULL		0x00000001
#define CDF_SIGNED16		0x00000002
#define CDF_SIGNED32		0x00000003
#define CDF_FLOAT		0x00000004
#define CDF_DOUBLE		0x00000005
#define CDF_CY			0x00000006
#define CDF_DATE		0x00000007
#define CDF_BSTR		0x00000008
#define CDF_DISPATCH		0x00000009
#define CDF_ERROR		0x0000000a
#define CDF_BOOL		0x0000000b
#define CDF_VARIANT		0x0000000c
#define CDF_UNKNOWN		0x0000000d
#define CDF_DECIMAL		0x0000000e
#define CDF_SIGNED8		0x00000010
#define CDF_UNSIGNED8		0x00000011
#define CDF_UNSIGNED16		0x00000012
#define CDF_UNSIGNED32		0x00000013
#define CDF_SIGNED64		0x00000014
#define CDF_UNSIGNED64		0x00000015
#define CDF_INT			0x00000016
#define CDF_UINT		0x00000017
#define CDF_VOID		0x00000018
#define CDF_HRESULT		0x00000019
#define CDF_PTR			0x0000001a
#define CDF_SAFEARRAY		0x0000001b
#define CDF_CARRAY		0x0000001c
#define CDF_USERDEFINED		0x0000001d
#define CDF_LENGTH32_STRING	0x0000001e
#define CDF_LENGTH32_WSTRING	0x0000001f
#define CDF_FILETIME		0x00000040
#define CDF_BLOB		0x00000041
#define CDF_STREAM		0x00000042
#define CDF_STORAGE		0x00000043
#define CDF_STREAMED_OBJECT	0x00000044
#define CDF_STORED_OBJECT	0x00000045
#define CDF_BLOB_OBJECT		0x00000046
#define CDF_CLIPBOARD		0x00000047
#define CDF_CLSID		0x00000048
#define CDF_VECTOR		0x00001000
#define CDF_ARRAY		0x00002000
#define CDF_BYREF		0x00004000
#define CDF_RESERVED		0x00008000
#define CDF_ILLEGAL		0x0000ffff
#define CDF_ILLEGALMASKED	0x00000fff
#define CDF_TYPEMASK		0x00000fff

#define CDF_PROPERTY_CODE_PAGE			0x00000001
#define CDF_PROPERTY_TITLE			0x00000002
#define CDF_PROPERTY_SUBJECT			0x00000003
#define CDF_PROPERTY_AUTHOR			0x00000004
#define CDF_PROPERTY_KEYWORDS			0x00000005
#define CDF_PROPERTY_COMMENTS			0x00000006
#define CDF_PROPERTY_TEMPLATE			0x00000007
#define CDF_PROPERTY_LAST_SAVED_BY		0x00000008
#define CDF_PROPERTY_REVISION_NUMBER		0x00000009
#define CDF_PROPERTY_TOTAL_EDITING_TIME		0x0000000a
#define CDF_PROPERTY_LAST_PRINTED		0X0000000b
#define CDF_PROPERTY_CREATE_TIME		0x0000000c
#define CDF_PROPERTY_LAST_SAVED_TIME		0x0000000d
#define CDF_PROPERTY_NUMBER_OF_PAGES		0x0000000e
#define CDF_PROPERTY_NUMBER_OF_WORDS		0x0000000f
#define CDF_PROPERTY_NUMBER_OF_CHARACTERS	0x00000010
#define CDF_PROPERTY_THUMBNAIL			0x00000011
#define CDF_PROPERTY_NAME_OF_APPLICATION	0x00000012
#define CDF_PROPERTY_SECURITY			0x00000013
#define CDF_PROPERTY_LOCALE_ID			0x80000000

typedef struct {
	int i_fd;
	const unsigned char *i_buf;
	size_t i_len;
} cdf_info_t;

struct timespec;
int cdf_timestamp_to_timespec(struct timespec *, cdf_timestamp_t);
int cdf_timespec_to_timestamp(cdf_timestamp_t *, const struct timespec *);
int cdf_read_header(const cdf_info_t *, cdf_header_t *);
void cdf_swap_header(cdf_header_t *);
void cdf_unpack_header(cdf_header_t *, char *);
void cdf_swap_dir(cdf_directory_t *);
void cdf_unpack_dir(cdf_directory_t *, char *);
void cdf_swap_class(cdf_classid_t *);
ssize_t cdf_read_sector(const cdf_info_t *, void *, size_t, size_t,
    const cdf_header_t *, cdf_secid_t);
ssize_t cdf_read_short_sector(const cdf_stream_t *, void *, size_t, size_t,
    const cdf_header_t *, cdf_secid_t);
int cdf_read_sat(const cdf_info_t *, cdf_header_t *, cdf_sat_t *);
size_t cdf_count_chain(const cdf_sat_t *, cdf_secid_t, size_t);
int cdf_read_long_sector_chain(const cdf_info_t *, const cdf_header_t *,
    const cdf_sat_t *, cdf_secid_t, size_t, cdf_stream_t *);
int cdf_read_short_sector_chain(const cdf_header_t *, const cdf_sat_t *,
    const cdf_stream_t *, cdf_secid_t, size_t, cdf_stream_t *);
int cdf_read_sector_chain(const cdf_info_t *, const cdf_header_t *,
    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *, cdf_secid_t,
    size_t, cdf_stream_t *);
int cdf_read_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
    cdf_dir_t *);
int cdf_read_ssat(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
    cdf_sat_t *);
int cdf_read_short_stream(const cdf_info_t *, const cdf_header_t *,
    const cdf_sat_t *, const cdf_dir_t *, cdf_stream_t *);
int cdf_read_property_info(const cdf_stream_t *, const cdf_header_t *, uint32_t,
    cdf_property_info_t **, size_t *, size_t *);
int cdf_read_summary_info(const cdf_info_t *, const cdf_header_t *,
    const cdf_sat_t *, const cdf_sat_t *, const cdf_stream_t *,
    const cdf_dir_t *, cdf_stream_t *);
int cdf_unpack_summary_info(const cdf_stream_t *, const cdf_header_t *,
    cdf_summary_info_header_t *, cdf_property_info_t **, size_t *);
int cdf_print_classid(char *, size_t, const cdf_classid_t *);
int cdf_print_property_name(char *, size_t, uint32_t);
int cdf_print_elapsed_time(char *, size_t, cdf_timestamp_t);
uint16_t cdf_tole2(uint16_t);
uint32_t cdf_tole4(uint32_t);
uint64_t cdf_tole8(uint64_t);
char *cdf_ctime(const time_t *);

#ifdef CDF_DEBUG
void cdf_dump_header(const cdf_header_t *);
void cdf_dump_sat(const char *, const cdf_sat_t *, size_t);
void cdf_dump(void *, size_t);
void cdf_dump_stream(const cdf_header_t *, const cdf_stream_t *);
void cdf_dump_dir(const cdf_info_t *, const cdf_header_t *, const cdf_sat_t *,
    const cdf_sat_t *, const cdf_stream_t *, const cdf_dir_t *);
void cdf_dump_property_info(const cdf_property_info_t *, size_t);
void cdf_dump_summary_info(const cdf_header_t *, const cdf_stream_t *);
#endif


#endif /* _H_CDF_ */
OpenPOWER on IntegriCloud