summaryrefslogtreecommitdiffstats
path: root/subversion/libsvn_subr/base64.c
blob: 97ee3d285c50d98b0111a0f2a8c2b2f0bcc8aa4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
/*
 * base64.c:  base64 encoding and decoding functions
 *
 * ====================================================================
 *    Licensed to the Apache Software Foundation (ASF) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The ASF licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
 */



#include <string.h>

#include <apr.h>
#include <apr_pools.h>
#include <apr_general.h>        /* for APR_INLINE */

#include "svn_pools.h"
#include "svn_io.h"
#include "svn_error.h"
#include "svn_base64.h"
#include "private/svn_string_private.h"
#include "private/svn_subr_private.h"

/* When asked to format the base64-encoded output as multiple lines,
   we put this many chars in each line (plus one new line char) unless
   we run out of data.
   It is vital for some of the optimizations below that this value is
   a multiple of 4. */
#define BASE64_LINELEN 76

/* This number of bytes is encoded in a line of base64 chars. */
#define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)

/* Value -> base64 char mapping table (2^6 entries) */
static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
                                "abcdefghijklmnopqrstuvwxyz0123456789+/";


/* Binary input --> base64-encoded output */

struct encode_baton {
  svn_stream_t *output;
  unsigned char buf[3];         /* Bytes waiting to be encoded */
  size_t buflen;                /* Number of bytes waiting */
  size_t linelen;               /* Bytes output so far on this line */
  apr_pool_t *scratch_pool;
};


/* Base64-encode a group.  IN needs to have three bytes and OUT needs
   to have room for four bytes.  The input group is treated as four
   six-bit units which are treated as lookups into base64tab for the
   bytes of the output group.  */
static APR_INLINE void
encode_group(const unsigned char *in, char *out)
{
  /* Expand input bytes to machine word length (with zero extra cost
     on x86/x64) ... */
  apr_size_t part0 = in[0];
  apr_size_t part1 = in[1];
  apr_size_t part2 = in[2];

  /* ... to prevent these arithmetic operations from being limited to
     byte size.  This saves non-zero cost conversions of the result when
     calculating the addresses within base64tab. */
  out[0] = base64tab[part0 >> 2];
  out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
  out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
  out[3] = base64tab[part2 & 0x3f];
}

/* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
   BASE64_LINELEN chars and append it to STR.  It does not assume that
   a new line char will be appended, though.
   The code in this function will simply transform the data without
   performing any boundary checks.  Therefore, DATA must have at least
   BYTES_PER_LINE left and space for at least another BASE64_LINELEN
   chars must have been pre-allocated in STR before calling this
   function. */
static void
encode_line(svn_stringbuf_t *str, const char *data)
{
  /* Translate directly from DATA to STR->DATA. */
  const unsigned char *in = (const unsigned char *)data;
  char *out = str->data + str->len;
  char *end = out + BASE64_LINELEN;

  /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
     a multiple of 4. */
  for ( ; out != end; in += 3, out += 4)
    encode_group(in, out);

  /* Expand and terminate the string. */
  *out = '\0';
  str->len += BASE64_LINELEN;
}

/* (Continue to) Base64-encode the byte string DATA (of length LEN)
   into STR. Include newlines every so often if BREAK_LINES is true.
   INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
   make INBUF have room for three characters and initialize *INBUFLEN
   and *LINELEN to 0.

   INBUF and *INBUFLEN carry the leftover data from call to call, and
   *LINELEN carries the length of the current output line. */
static void
encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
             unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
             svn_boolean_t break_lines)
{
  char group[4];
  const char *p = data, *end = p + len;
  apr_size_t buflen;

  /* Resize the stringbuf to make room for the (approximate) size of
     output, to avoid repeated resizes later.
     Please note that our optimized code relies on the fact that STR
     never needs to be resized until we leave this function. */
  buflen = len * 4 / 3 + 4;
  if (break_lines)
    {
      /* Add an extra space for line breaks. */
      buflen += buflen / BASE64_LINELEN;
    }
  svn_stringbuf_ensure(str, str->len + buflen);

  /* Keep encoding three-byte groups until we run out.  */
  while (*inbuflen + (end - p) >= 3)
    {
      /* May we encode BYTES_PER_LINE bytes without caring about
         line breaks, data in the temporary INBUF or running out
         of data? */
      if (   *inbuflen == 0
          && (*linelen == 0 || !break_lines)
          && (end - p >= BYTES_PER_LINE))
        {
          /* Yes, we can encode a whole chunk of data at once. */
          encode_line(str, p);
          p += BYTES_PER_LINE;
          *linelen += BASE64_LINELEN;
        }
      else
        {
          /* No, this is one of a number of special cases.
             Encode the data byte by byte. */
          memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
          p += (3 - *inbuflen);
          encode_group(inbuf, group);
          svn_stringbuf_appendbytes(str, group, 4);
          *inbuflen = 0;
          *linelen += 4;
        }

      /* Add line breaks as necessary. */
      if (break_lines && *linelen == BASE64_LINELEN)
        {
          svn_stringbuf_appendbyte(str, '\n');
          *linelen = 0;
        }
    }

  /* Tack any extra input onto *INBUF.  */
  memcpy(inbuf + *inbuflen, p, end - p);
  *inbuflen += (end - p);
}


/* Encode leftover data, if any, and possibly a final newline (if
   there has been any data and BREAK_LINES is set), appending to STR.
   LEN must be in the range 0..2.  */
static void
encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
                     size_t len, size_t linelen, svn_boolean_t break_lines)
{
  unsigned char ingroup[3];
  char outgroup[4];

  if (len > 0)
    {
      memcpy(ingroup, extra, len);
      memset(ingroup + len, 0, 3 - len);
      encode_group(ingroup, outgroup);
      memset(outgroup + (len + 1), '=', 4 - (len + 1));
      svn_stringbuf_appendbytes(str, outgroup, 4);
      linelen += 4;
    }
  if (break_lines && linelen > 0)
    svn_stringbuf_appendbyte(str, '\n');
}


/* Write handler for svn_base64_encode.  */
static svn_error_t *
encode_data(void *baton, const char *data, apr_size_t *len)
{
  struct encode_baton *eb = baton;
  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
  apr_size_t enclen;
  svn_error_t *err = SVN_NO_ERROR;

  /* Encode this block of data and write it out.  */
  encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen, TRUE);
  enclen = encoded->len;
  if (enclen != 0)
    err = svn_stream_write(eb->output, encoded->data, &enclen);
  svn_pool_clear(eb->scratch_pool);
  return err;
}


/* Close handler for svn_base64_encode().  */
static svn_error_t *
finish_encoding_data(void *baton)
{
  struct encode_baton *eb = baton;
  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
  apr_size_t enclen;
  svn_error_t *err = SVN_NO_ERROR;

  /* Encode a partial group at the end if necessary, and write it out.  */
  encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen, TRUE);
  enclen = encoded->len;
  if (enclen != 0)
    err = svn_stream_write(eb->output, encoded->data, &enclen);

  /* Pass on the close request and clean up the baton.  */
  if (err == SVN_NO_ERROR)
    err = svn_stream_close(eb->output);
  svn_pool_destroy(eb->scratch_pool);
  return err;
}


svn_stream_t *
svn_base64_encode(svn_stream_t *output, apr_pool_t *pool)
{
  struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
  svn_stream_t *stream;

  eb->output = output;
  eb->buflen = 0;
  eb->linelen = 0;
  eb->scratch_pool = svn_pool_create(pool);
  stream = svn_stream_create(eb, pool);
  svn_stream_set_write(stream, encode_data);
  svn_stream_set_close(stream, finish_encoding_data);
  return stream;
}


const svn_string_t *
svn_base64_encode_string2(const svn_string_t *str,
                          svn_boolean_t break_lines,
                          apr_pool_t *pool)
{
  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
  unsigned char ingroup[3];
  size_t ingrouplen = 0;
  size_t linelen = 0;

  encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
               break_lines);
  encode_partial_group(encoded, ingroup, ingrouplen, linelen,
                       break_lines);
  return svn_stringbuf__morph_into_string(encoded);
}

const svn_string_t *
svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
{
  return svn_base64_encode_string2(str, TRUE, pool);
}



/* Base64-encoded input --> binary output */

struct decode_baton {
  svn_stream_t *output;
  unsigned char buf[4];         /* Bytes waiting to be decoded */
  int buflen;                   /* Number of bytes waiting */
  svn_boolean_t done;           /* True if we already saw an '=' */
  apr_pool_t *scratch_pool;
};


/* Base64-decode a group.  IN needs to have four bytes and OUT needs
   to have room for three bytes.  The input bytes must already have
   been decoded from base64tab into the range 0..63.  The four
   six-bit values are pasted together to form three eight-bit bytes.  */
static APR_INLINE void
decode_group(const unsigned char *in, char *out)
{
  out[0] = (char)((in[0] << 2) | (in[1] >> 4));
  out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
  out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
}

/* Lookup table for base64 characters; reverse_base64[ch] gives a
   negative value if ch is not a valid base64 character, or otherwise
   the value of the byte represented; 'A' => 0 etc. */
static const signed char reverse_base64[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};

/* Similar to decode_group but this function also translates the
   6-bit values from the IN buffer before translating them.
   Return FALSE if a non-base64 char (e.g. '=' or new line)
   has been encountered. */
static APR_INLINE svn_boolean_t
decode_group_directly(const unsigned char *in, char *out)
{
  /* Translate the base64 chars in values [0..63, 0xff] */
  apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
  apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
  apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
  apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];

  /* Pack 4x6 bits into 3x8.*/
  out[0] = (char)((part0 << 2) | (part1 >> 4));
  out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
  out[2] = (char)(((part2 & 0x3) << 6) | part3);

  /* FALSE, iff any part is 0xff. */
  return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
}

/* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
   STR.  After the function returns, *DATA will point to the first char
   that has not been translated, yet.  Returns TRUE if all BASE64_LINELEN
   chars could be translated, i.e. no special char has been encountered
   in between.
   The code in this function will simply transform the data without
   performing any boundary checks.  Therefore, DATA must have at least
   BASE64_LINELEN left and space for at least another BYTES_PER_LINE
   chars must have been pre-allocated in STR before calling this
   function. */
static svn_boolean_t
decode_line(svn_stringbuf_t *str, const char **data)
{
  /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
  const unsigned char *p = *(const unsigned char **)data;
  char *out = str->data + str->len;
  char *end = out + BYTES_PER_LINE;

  /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
     a multiple of 4.  Stop translation as soon as we encounter a special
     char.  Leave the entire group untouched in that case. */
  for (; out < end; p += 4, out += 3)
    if (!decode_group_directly(p, out))
      break;

  /* Update string sizes and positions. */
  str->len = out - str->data;
  *out = '\0';
  *data = (const char *)p;

  /* Return FALSE, if the caller should continue the decoding process
     using the slow standard method. */
  return out == end;
}


/* (Continue to) Base64-decode the byte string DATA (of length LEN)
   into STR. INBUF, INBUFLEN, and DONE are used internally; the
   caller shall have room for four bytes in INBUF and initialize
   *INBUFLEN to 0 and *DONE to FALSE.

   INBUF and *INBUFLEN carry the leftover bytes from call to call, and
   *DONE keeps track of whether we've seen an '=' which terminates the
   encoded data. */
static void
decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
             unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
{
  const char *p = data;
  char group[3];
  signed char find;
  const char *end = data + len;

  /* Resize the stringbuf to make room for the maximum size of output,
     to avoid repeated resizes later.  The optimizations in
     decode_line rely on no resizes being necessary!

     (*inbuflen+len) is encoded data length
     (*inbuflen+len)/4 is the number of complete 4-bytes sets
     (*inbuflen+len)/4*3 is the number of decoded bytes
     svn_stringbuf_ensure will add an additional byte for the terminating 0.
  */
  svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);

  while ( !*done && p < end )
    {
      /* If no data is left in temporary INBUF and there is at least
         one line-sized chunk left to decode, we may use the optimized
         code path. */
      if ((*inbuflen == 0) && (p + BASE64_LINELEN <= end))
        if (decode_line(str, &p))
          continue;

      /* A special case or decode_line encountered a special char. */
      if (*p == '=')
        {
          /* We are at the end and have to decode a partial group.  */
          if (*inbuflen >= 2)
            {
              memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
              decode_group(inbuf, group);
              svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
            }
          *done = TRUE;
        }
      else
        {
          find = reverse_base64[(unsigned char)*p];
          ++p;

          if (find >= 0)
            inbuf[(*inbuflen)++] = find;
          if (*inbuflen == 4)
            {
              decode_group(inbuf, group);
              svn_stringbuf_appendbytes(str, group, 3);
              *inbuflen = 0;
            }
        }
    }
}


/* Write handler for svn_base64_decode.  */
static svn_error_t *
decode_data(void *baton, const char *data, apr_size_t *len)
{
  struct decode_baton *db = baton;
  svn_stringbuf_t *decoded;
  apr_size_t declen;
  svn_error_t *err = SVN_NO_ERROR;

  /* Decode this block of data.  */
  decoded = svn_stringbuf_create_empty(db->scratch_pool);
  decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);

  /* Write the output, clean up, go home.  */
  declen = decoded->len;
  if (declen != 0)
    err = svn_stream_write(db->output, decoded->data, &declen);
  svn_pool_clear(db->scratch_pool);
  return err;
}


/* Close handler for svn_base64_decode().  */
static svn_error_t *
finish_decoding_data(void *baton)
{
  struct decode_baton *db = baton;
  svn_error_t *err;

  /* Pass on the close request and clean up the baton.  */
  err = svn_stream_close(db->output);
  svn_pool_destroy(db->scratch_pool);
  return err;
}


svn_stream_t *
svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
{
  struct decode_baton *db = apr_palloc(pool, sizeof(*db));
  svn_stream_t *stream;

  db->output = output;
  db->buflen = 0;
  db->done = FALSE;
  db->scratch_pool = svn_pool_create(pool);
  stream = svn_stream_create(db, pool);
  svn_stream_set_write(stream, decode_data);
  svn_stream_set_close(stream, finish_decoding_data);
  return stream;
}


const svn_string_t *
svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
{
  svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
  unsigned char ingroup[4];
  int ingrouplen = 0;
  svn_boolean_t done = FALSE;

  decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
  return svn_stringbuf__morph_into_string(decoded);
}


/* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
   If CHECKSUM->kind is not recognized, return NULL.
   ### That 'NULL' claim was in the header file when this was public, but
   doesn't look true in the implementation.

   ### This is now only used as a new implementation of svn_base64_from_md5();
   it would probably be safer to revert that to its old implementation. */
static svn_stringbuf_t *
base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
{
  svn_stringbuf_t *checksum_str;
  unsigned char ingroup[3];
  size_t ingrouplen = 0;
  size_t linelen = 0;
  checksum_str = svn_stringbuf_create_empty(pool);

  encode_bytes(checksum_str, checksum->digest,
               svn_checksum_size(checksum), ingroup, &ingrouplen,
               &linelen, TRUE);
  encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);

  /* Our base64-encoding routines append a final newline if any data
     was created at all, so let's hack that off. */
  if (checksum_str->len)
    {
      checksum_str->len--;
      checksum_str->data[checksum_str->len] = 0;
    }

  return checksum_str;
}


svn_stringbuf_t *
svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
{
  svn_checksum_t *checksum
    = svn_checksum__from_digest_md5(digest, pool);

  return base64_from_checksum(checksum, pool);
}
OpenPOWER on IntegriCloud