diff options
Diffstat (limited to 'libavformat/id3v2.c')
-rw-r--r-- | libavformat/id3v2.c | 463 |
1 files changed, 391 insertions, 72 deletions
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index b8994b2..a8273e2 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -1,24 +1,37 @@ /* - * ID3v2 header parser * Copyright (c) 2003 Fabrice Bellard * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +/** + * @file + * ID3v2 header parser + * + * Specifications available at: + * http://id3.org/Developer_Information + */ + +#include "config.h" + +#if CONFIG_ZLIB +#include <zlib.h> +#endif + #include "libavutil/avstring.h" #include "libavutil/dict.h" #include "libavutil/intreadwrite.h" @@ -42,12 +55,14 @@ const AVMetadataConv ff_id3v2_34_metadata_conv[] = { { "TPUB", "publisher" }, { "TRCK", "track" }, { "TSSE", "encoder" }, + { "USLT", "lyrics" }, { 0 } }; const AVMetadataConv ff_id3v2_4_metadata_conv[] = { - { "TDRL", "date" }, + { "TCMP", "compilation" }, { "TDRC", "date" }, + { "TDRL", "date" }, { "TDEN", "creation_time" }, { "TSOA", "album-sort" }, { "TSOP", "artist-sort" }, @@ -58,6 +73,7 @@ const AVMetadataConv ff_id3v2_4_metadata_conv[] = { static const AVMetadataConv id3v2_2_metadata_conv[] = { { "TAL", "album" }, { "TCO", "genre" }, + { "TCP", "compilation" }, { "TT2", "title" }, { "TEN", "encoded_by" }, { "TP1", "artist" }, @@ -155,16 +171,58 @@ static unsigned int get_size(AVIOContext *s, int len) return v; } +static unsigned int size_to_syncsafe(unsigned int size) +{ + return (((size) & (0x7f << 0)) >> 0) + + (((size) & (0x7f << 8)) >> 1) + + (((size) & (0x7f << 16)) >> 2) + + (((size) & (0x7f << 24)) >> 3); +} + +/* No real verification, only check that the tag consists of + * a combination of capital alpha-numerical characters */ +static int is_tag(const char *buf, unsigned int len) +{ + if (!len) + return 0; + + while (len--) + if ((buf[len] < 'A' || + buf[len] > 'Z') && + (buf[len] < '0' || + buf[len] > '9')) + return 0; + + return 1; +} + +/** + * Return 1 if the tag of length len at the given offset is valid, 0 if not, -1 on error + */ +static int check_tag(AVIOContext *s, int offset, unsigned int len) +{ + char tag[4]; + + if (len > 4 || + avio_seek(s, offset, SEEK_SET) < 0 || + avio_read(s, tag, len) < len) + return -1; + else if (!AV_RB32(tag) || is_tag(tag, len)) + return 1; + + return 0; +} + /** * Free GEOB type extra metadata. */ static void free_geobtag(void *obj) { ID3v2ExtraMetaGEOB *geob = obj; - av_free(geob->mime_type); - av_free(geob->file_name); - av_free(geob->description); - av_free(geob->data); + av_freep(&geob->mime_type); + av_freep(&geob->file_name); + av_freep(&geob->description); + av_freep(&geob->data); av_free(geob); } @@ -258,7 +316,7 @@ static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding, * Parse a text tag. */ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, - const char *key) + AVDictionary **metadata, const char *key) { uint8_t *dst; int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL; @@ -293,7 +351,52 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, av_freep(&dst); if (dst) - av_dict_set(&s->metadata, key, dst, dict_flags); + av_dict_set(metadata, key, dst, dict_flags); +} + +static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen, + AVDictionary **metadata) +{ + uint8_t lang[4]; + uint8_t *descriptor = NULL; // 'Content descriptor' + uint8_t *text = NULL; + char *key = NULL; + int encoding; + int ok = 0; + + if (taglen < 1) + goto error; + + encoding = avio_r8(pb); + taglen--; + + if (avio_read(pb, lang, 3) < 3) + goto error; + lang[3] = '\0'; + taglen -= 3; + + if (decode_str(s, pb, encoding, &descriptor, &taglen) < 0) + goto error; + + if (decode_str(s, pb, encoding, &text, &taglen) < 0) + goto error; + + // FFmpeg does not support hierarchical metadata, so concatenate the keys. + key = av_asprintf("lyrics-%s%s%s", descriptor[0] ? (char *)descriptor : "", + descriptor[0] ? "-" : "", + lang); + if (!key) + goto error; + + av_dict_set(metadata, key, text, 0); + + ok = 1; +error: + if (!ok) + av_log(s, AV_LOG_ERROR, "Error reading lyrics, skipped\n"); + av_free(descriptor); + av_free(text); + av_free(key); } /** @@ -313,14 +416,14 @@ static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB)); if (!geob_data) { - av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", + av_log(s, AV_LOG_ERROR, "Failed to alloc %"SIZE_SPECIFIER" bytes\n", sizeof(ID3v2ExtraMetaGEOB)); return; } new_extra = av_mallocz(sizeof(ID3v2ExtraMeta)); if (!new_extra) { - av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", + av_log(s, AV_LOG_ERROR, "Failed to alloc %"SIZE_SPECIFIER" bytes\n", sizeof(ID3v2ExtraMeta)); goto fail; } @@ -496,7 +599,7 @@ static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen, } apic->buf = av_buffer_alloc(taglen + FF_INPUT_BUFFER_PADDING_SIZE); - if (!apic->buf || avio_read(pb, apic->buf->data, taglen) != taglen) + if (!apic->buf || !taglen || avio_read(pb, apic->buf->data, taglen) != taglen) goto fail; memset(apic->buf->data + taglen, 0, FF_INPUT_BUFFER_PADDING_SIZE); @@ -514,6 +617,106 @@ fail: avio_seek(pb, end, SEEK_SET); } +static void read_chapter(AVFormatContext *s, AVIOContext *pb, int len, const char *ttag, ID3v2ExtraMeta **extra_meta, int isv34) +{ + AVRational time_base = {1, 1000}; + uint32_t start, end; + AVChapter *chapter; + uint8_t *dst = NULL; + int taglen; + char tag[5]; + + if (!s) { + /* We should probably just put the chapter data to extra_meta here + * and do the AVFormatContext-needing part in a separate + * ff_id3v2_parse_apic()-like function. */ + av_log(NULL, AV_LOG_DEBUG, "No AVFormatContext, skipped ID3 chapter data\n"); + return; + } + + if (decode_str(s, pb, 0, &dst, &len) < 0) + return; + if (len < 16) + return; + + start = avio_rb32(pb); + end = avio_rb32(pb); + avio_skip(pb, 8); + + chapter = avpriv_new_chapter(s, s->nb_chapters + 1, time_base, start, end, dst); + if (!chapter) { + av_free(dst); + return; + } + + len -= 16; + while (len > 10) { + if (avio_read(pb, tag, 4) < 4) + goto end; + tag[4] = 0; + taglen = avio_rb32(pb); + avio_skip(pb, 2); + len -= 10; + if (taglen < 0 || taglen > len) + goto end; + if (tag[0] == 'T') + read_ttag(s, pb, taglen, &chapter->metadata, tag); + else + avio_skip(pb, taglen); + len -= taglen; + } + + ff_metadata_conv(&chapter->metadata, NULL, ff_id3v2_34_metadata_conv); + ff_metadata_conv(&chapter->metadata, NULL, ff_id3v2_4_metadata_conv); +end: + av_free(dst); +} + +static void free_priv(void *obj) +{ + ID3v2ExtraMetaPRIV *priv = obj; + av_freep(&priv->owner); + av_freep(&priv->data); + av_freep(&priv); +} + +static void read_priv(AVFormatContext *s, AVIOContext *pb, int taglen, + const char *tag, ID3v2ExtraMeta **extra_meta, int isv34) +{ + ID3v2ExtraMeta *meta; + ID3v2ExtraMetaPRIV *priv; + + meta = av_mallocz(sizeof(*meta)); + priv = av_mallocz(sizeof(*priv)); + + if (!meta || !priv) + goto fail; + + if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &priv->owner, &taglen) < 0) + goto fail; + + priv->data = av_malloc(taglen); + if (!priv->data) + goto fail; + + priv->datasize = taglen; + + if (avio_read(pb, priv->data, priv->datasize) != priv->datasize) + goto fail; + + meta->tag = "PRIV"; + meta->data = priv; + meta->next = *extra_meta; + *extra_meta = meta; + + return; + +fail: + if (priv) + free_priv(priv); + av_freep(&meta); +} + typedef struct ID3v2EMFunc { const char *tag3; const char *tag4; @@ -526,6 +729,8 @@ typedef struct ID3v2EMFunc { static const ID3v2EMFunc id3v2_extra_meta_funcs[] = { { "GEO", "GEOB", read_geobtag, free_geobtag }, { "PIC", "APIC", read_apic, free_apic }, + { "CHAP","CHAP", read_chapter, NULL }, + { "PRIV","PRIV", read_priv, free_priv }, { NULL } }; @@ -538,7 +743,7 @@ static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34) { int i = 0; while (id3v2_extra_meta_funcs[i].tag3) { - if (!memcmp(tag, + if (tag && !memcmp(tag, (isv34 ? id3v2_extra_meta_funcs[i].tag4 : id3v2_extra_meta_funcs[i].tag3), (isv34 ? 4 : 3))) @@ -548,19 +753,25 @@ static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34) return NULL; } -static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, +static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata, + AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta) { - int isv34, tlen, unsync; + int isv34, unsync; + unsigned tlen; char tag[5]; - int64_t next, end = avio_tell(s->pb) + len; + int64_t next, end = avio_tell(pb) + len; int taghdrlen; const char *reason = NULL; - AVIOContext pb; + AVIOContext pb_local; AVIOContext *pbx; unsigned char *buffer = NULL; int buffer_size = 0; - const ID3v2EMFunc *extra_func; + const ID3v2EMFunc *extra_func = NULL; + unsigned char *uncompressed_buffer = NULL; + av_unused int uncompressed_buffer_size = 0; + + av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len); switch (version) { case 2: @@ -586,7 +797,7 @@ static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, unsync = flags & 0x80; if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */ - int extlen = get_size(s->pb, 4); + int extlen = get_size(pb, 4); if (version == 4) /* In v2.4 the length includes the length field we just read. */ extlen -= 4; @@ -595,35 +806,63 @@ static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, reason = "invalid extended header length"; goto error; } - avio_skip(s->pb, extlen); + avio_skip(pb, extlen); + len -= extlen + 4; + if (len < 0) { + reason = "extended header too long."; + goto error; + } } while (len >= taghdrlen) { unsigned int tflags = 0; int tunsync = 0; + int tcomp = 0; + int tencr = 0; + unsigned long av_unused dlen; if (isv34) { - avio_read(s->pb, tag, 4); + if (avio_read(pb, tag, 4) < 4) + break; tag[4] = 0; if (version == 3) { - tlen = avio_rb32(s->pb); - } else - tlen = get_size(s->pb, 4); - tflags = avio_rb16(s->pb); + tlen = avio_rb32(pb); + } else { + /* some encoders incorrectly uses v3 sizes instead of syncsafe ones + * so check the next tag to see which one to use */ + tlen = avio_rb32(pb); + if (tlen > 0x7f) { + if (tlen < len) { + int64_t cur = avio_tell(pb); + + if (ffio_ensure_seekback(pb, 2 /* tflags */ + tlen + 4 /* next tag */)) + break; + + if (check_tag(pb, cur + 2 + size_to_syncsafe(tlen), 4) == 1) + tlen = size_to_syncsafe(tlen); + else if (check_tag(pb, cur + 2 + tlen, 4) != 1) + break; + avio_seek(pb, cur, SEEK_SET); + } else + tlen = size_to_syncsafe(tlen); + } + } + tflags = avio_rb16(pb); tunsync = tflags & ID3v2_FLAG_UNSYNCH; } else { - avio_read(s->pb, tag, 3); + if (avio_read(pb, tag, 3) < 3) + break; tag[3] = 0; - tlen = avio_rb24(s->pb); + tlen = avio_rb24(pb); } - if (tlen < 0 || tlen > len - taghdrlen) { - av_log(s, AV_LOG_WARNING, - "Invalid size in frame %s, skipping the rest of tag.\n", - tag); + if (tlen > (1<<28)) break; - } len -= taghdrlen + tlen; - next = avio_tell(s->pb) + tlen; + + if (len < 0) + break; + + next = avio_tell(pb) + tlen; if (!tlen) { if (tag[0]) @@ -633,57 +872,110 @@ static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, } if (tflags & ID3v2_FLAG_DATALEN) { - avio_rb32(s->pb); + if (tlen < 4) + break; + dlen = avio_rb32(pb); tlen -= 4; - } + } else + dlen = tlen; + + tcomp = tflags & ID3v2_FLAG_COMPRESSION; + tencr = tflags & ID3v2_FLAG_ENCRYPTION; + + /* skip encrypted tags and, if no zlib, compressed tags */ + if (tencr || (!CONFIG_ZLIB && tcomp)) { + const char *type; + if (!tcomp) + type = "encrypted"; + else if (!tencr) + type = "compressed"; + else + type = "encrypted and compressed"; - if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) { - av_log(s, AV_LOG_WARNING, - "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); - avio_skip(s->pb, tlen); + av_log(s, AV_LOG_WARNING, "Skipping %s ID3v2 frame %s.\n", type, tag); + avio_skip(pb, tlen); /* check for text tag or supported special meta tag */ } else if (tag[0] == 'T' || + !memcmp(tag, "USLT", 4) || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) { - if (unsync || tunsync) { - int64_t end = avio_tell(s->pb) + tlen; - uint8_t *b; + pbx = pb; + + if (unsync || tunsync || tcomp) { av_fast_malloc(&buffer, &buffer_size, tlen); if (!buffer) { av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen); goto seek; } + } + if (unsync || tunsync) { + int64_t end = avio_tell(pb) + tlen; + uint8_t *b; + b = buffer; - while (avio_tell(s->pb) < end && !s->pb->eof_reached) { - *b++ = avio_r8(s->pb); - if (*(b - 1) == 0xff && avio_tell(s->pb) < end - 1 && - !s->pb->eof_reached ) { - uint8_t val = avio_r8(s->pb); - *b++ = val ? val : avio_r8(s->pb); + while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) { + *b++ = avio_r8(pb); + if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 && + b - buffer < tlen && + !pb->eof_reached ) { + uint8_t val = avio_r8(pb); + *b++ = val ? val : avio_r8(pb); } } - ffio_init_context(&pb, buffer, b - buffer, 0, NULL, NULL, NULL, + ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL, NULL); tlen = b - buffer; - pbx = &pb; // read from sync buffer - } else { - pbx = s->pb; // read straight from input + pbx = &pb_local; // read from sync buffer } + +#if CONFIG_ZLIB + if (tcomp) { + int err; + + av_log(s, AV_LOG_DEBUG, "Compresssed frame %s tlen=%d dlen=%ld\n", tag, tlen, dlen); + + av_fast_malloc(&uncompressed_buffer, &uncompressed_buffer_size, dlen); + if (!uncompressed_buffer) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %ld bytes\n", dlen); + goto seek; + } + + if (!(unsync || tunsync)) { + err = avio_read(pb, buffer, tlen); + if (err < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read compressed tag\n"); + goto seek; + } + tlen = err; + } + + err = uncompress(uncompressed_buffer, &dlen, buffer, tlen); + if (err != Z_OK) { + av_log(s, AV_LOG_ERROR, "Failed to uncompress tag: %d\n", err); + goto seek; + } + ffio_init_context(&pb_local, uncompressed_buffer, dlen, 0, NULL, NULL, NULL, NULL); + tlen = dlen; + pbx = &pb_local; // read from sync buffer + } +#endif if (tag[0] == 'T') /* parse text tag */ - read_ttag(s, pbx, tlen, tag); + read_ttag(s, pbx, tlen, metadata, tag); + else if (!memcmp(tag, "USLT", 4)) + read_uslt(s, pbx, tlen, metadata); else /* parse special meta tag */ extra_func->read(s, pbx, tlen, tag, extra_meta, isv34); } else if (!tag[0]) { if (tag[1]) - av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding"); - avio_skip(s->pb, tlen); + av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding\n"); + avio_skip(pb, tlen); break; } /* Skip to end of tag */ seek: - avio_seek(s->pb, next, SEEK_SET); + avio_seek(pb, next, SEEK_SET); } /* Footer preset, always 10 bytes, skip over it */ @@ -694,25 +986,38 @@ error: if (reason) av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason); - avio_seek(s->pb, end, SEEK_SET); + avio_seek(pb, end, SEEK_SET); av_free(buffer); + av_free(uncompressed_buffer); return; } -void ff_id3v2_read(AVFormatContext *s, const char *magic, - ID3v2ExtraMeta **extra_meta) +static void id3v2_read_internal(AVIOContext *pb, AVDictionary **metadata, + AVFormatContext *s, const char *magic, + ID3v2ExtraMeta **extra_meta, int64_t max_search_size) { int len, ret; uint8_t buf[ID3v2_HEADER_SIZE]; int found_header; - int64_t off; + int64_t start, off; + + if (max_search_size && max_search_size < ID3v2_HEADER_SIZE) + return; + start = avio_tell(pb); do { /* save the current offset in case there's nothing to read/skip */ - off = avio_tell(s->pb); - ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE); - if (ret != ID3v2_HEADER_SIZE) + off = avio_tell(pb); + if (max_search_size && off - start >= max_search_size - ID3v2_HEADER_SIZE) { + avio_seek(pb, off, SEEK_SET); break; + } + + ret = avio_read(pb, buf, ID3v2_HEADER_SIZE); + if (ret != ID3v2_HEADER_SIZE) { + avio_seek(pb, off, SEEK_SET); + break; + } found_header = ff_id3v2_match(buf, magic); if (found_header) { /* parse ID3v2 header */ @@ -720,15 +1025,27 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic, ((buf[7] & 0x7f) << 14) | ((buf[8] & 0x7f) << 7) | (buf[9] & 0x7f); - id3v2_parse(s, len, buf[3], buf[5], extra_meta); + id3v2_parse(pb, metadata, s, len, buf[3], buf[5], extra_meta); } else { - avio_seek(s->pb, off, SEEK_SET); + avio_seek(pb, off, SEEK_SET); } } while (found_header); - ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv); - ff_metadata_conv(&s->metadata, NULL, id3v2_2_metadata_conv); - ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv); - merge_date(&s->metadata); + ff_metadata_conv(metadata, NULL, ff_id3v2_34_metadata_conv); + ff_metadata_conv(metadata, NULL, id3v2_2_metadata_conv); + ff_metadata_conv(metadata, NULL, ff_id3v2_4_metadata_conv); + merge_date(metadata); +} + +void ff_id3v2_read_dict(AVIOContext *pb, AVDictionary **metadata, + const char *magic, ID3v2ExtraMeta **extra_meta) +{ + id3v2_read_internal(pb, metadata, NULL, magic, extra_meta, 0); +} + +void ff_id3v2_read(AVFormatContext *s, const char *magic, + ID3v2ExtraMeta **extra_meta, unsigned int max_search_size) +{ + id3v2_read_internal(s->pb, &s->metadata, s, magic, extra_meta, max_search_size); } void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) @@ -743,6 +1060,8 @@ void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) av_freep(¤t); current = next; } + + *extra_meta = NULL; } int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta) |