From 58288c7cb28d72c93ad5309f1cfe7163be8b12fd Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 26 Aug 2021 18:36:00 +0200 Subject: [PATCH] mp4: Remove tracks array. The mp4 structure currently contains an array of 1024 track pointers which are initialized to point to track structures allocated as we encounter tracks. This is kind of wasteful given that we will only care about audio tracks, and only ever consider the first one. This patch replaces the pointer array by a single track structure embedded within struct mp4. Besides the above mentioned memory savings, this approach allows us to remove a bunch of identical sanity checks in the atom parsers. The old code maintained the ->audio_track pointer of struct mp4 to tell whether we already saw an mp4a atom and thus already allocated a structure for the corresponding track. We now use a state based approach with three states instead. The state value determines whether we have to parse the atom. The first state transition takes place when the mp4a atom is encountered while the second transition occurs at the subsequent trak atom, if any. If an atom parser is called while the state machine is in an unexpected state, we return success rather than an error code to ignore the atom without failing the whole operation. --- mp4.c | 126 +++++++++++++++++++++++++++------------------------------- 1 file changed, 58 insertions(+), 68 deletions(-) diff --git a/mp4.c b/mp4.c index 365dce65..3963f783 100644 --- a/mp4.c +++ b/mp4.c @@ -13,7 +13,23 @@ #include "string.h" #include "mp4.h" +/** + * The three states of the mp4 parser. The parser only loads the audio specific + * values and tables when it is in the second state. + */ +enum audio_track_state { + /** We haven't encountered an mp4a atom so far. */ + ATS_INITIAL, + /** We have seen an mp4a atom but no subsequent trak atom yet. */ + ATS_SEEN_MP4A, + /** A trak atom was seen *after* the mp4a atom. */ + ATS_TRACK_CHANGE, +}; + struct mp4_track { + /* determines which atoms we still need to parse. */ + enum audio_track_state state; + /* mp4a */ uint16_t channel_count; uint16_t sample_rate; @@ -41,8 +57,6 @@ struct mp4_track { uint64_t duration; }; -#define MAX_TRACKS 1024 - struct mp4 { const struct mp4_callback *cb; @@ -56,14 +70,7 @@ struct mp4 { uint32_t udta_size; uint8_t last_atom; - /* incremental track index while reading the file */ - int32_t total_tracks; - /* track data */ - struct mp4_track *track[MAX_TRACKS]; - /* the first audio track found */ - struct mp4_track *audio_track; - - /* metadata */ + struct mp4_track track; struct mp4_metadata meta; }; @@ -207,11 +214,10 @@ static int read_stsz(struct mp4 *f) { int ret; int32_t i; - struct mp4_track *t; + struct mp4_track *t = &f->track; - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; + if (t->state != ATS_SEEN_MP4A || t->stsz_table) + return 1; skip_bytes(f, 4); /* version (1), flags (3) */ ret = read_int32(f, &t->stsz_sample_size); if (ret <= 0) @@ -234,13 +240,10 @@ static int read_stts(struct mp4 *f) { int ret; int32_t i; - struct mp4_track *t; + struct mp4_track *t = &f->track; - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; - if (t->stts_entry_count) - return 0; + if (t->state != ATS_SEEN_MP4A || t->stts_sample_count) + return 1; skip_bytes(f, 4); /* version (1), flags (3) */ ret = read_int32(f, &t->stts_entry_count); if (ret <= 0) @@ -260,12 +263,12 @@ static int read_stsc(struct mp4 *f) { int ret; int32_t i; - struct mp4_track *t; - - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; + struct mp4_track *t = &f->track; + if (t->state != ATS_SEEN_MP4A) + return 1; + if (t->stsc_first_chunk || t->stsc_samples_per_chunk) + return 1; skip_bytes(f, 4); /* version (1), flags (3) */ ret = read_int32(f, &t->stsc_entry_count); if (ret <= 0) @@ -289,12 +292,10 @@ static int read_stco(struct mp4 *f) { int ret; int32_t i; - struct mp4_track *t; - - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; + struct mp4_track *t = &f->track; + if (t->state != ATS_SEEN_MP4A || t->stco_chunk_offset) + return 1; skip_bytes(f, 4); /* version (1), flags (3) */ ret = read_int32(f, &t->stco_entry_count); if (ret <= 0) @@ -313,11 +314,9 @@ static int read_stsd(struct mp4 *f) { int ret; uint32_t i, entry_count; - struct mp4_track *t; - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; + if (f->track.state != ATS_INITIAL) + return 1; skip_bytes(f, 4); /* version (1), flags (3) */ ret = read_int32(f, &entry_count); if (ret <= 0) @@ -330,15 +329,15 @@ static int read_stsd(struct mp4 *f) if (ret <= 0) return ret; skip += size; - if (!f->audio_track && atom_type == ATOM_MP4A) { - f->audio_track = t; + if (atom_type == ATOM_MP4A) { + f->track.state = ATS_SEEN_MP4A; /* reserved (6), data reference index (2), reserved (8) */ skip_bytes(f, 16); - ret = read_int16(f, &t->channel_count); + ret = read_int16(f, &f->track.channel_count); if (ret <= 0) return ret; skip_bytes(f, 6); - ret = read_int16(f, &t->sample_rate); + ret = read_int16(f, &f->track.sample_rate); if (ret <= 0) return ret; } @@ -412,12 +411,10 @@ static int read_mdhd(struct mp4 *f) { int ret; uint32_t version; - struct mp4_track *t; - - if (f->total_tracks == 0) - return -1; - t = f->track[f->total_tracks - 1]; + struct mp4_track *t = &f->track; + if (t->state != ATS_INITIAL) + return 1; ret = read_int32(f, &version); if (ret <= 0) return ret; @@ -544,13 +541,11 @@ static int parse_sub_atoms(struct mp4 *f, uint64_t total_size, bool meta_only) if (size == 0) return -1; dest = get_position(f) + size - header_size; - if (atom_type == ATOM_TRAK) { - if (f->total_tracks >= MAX_TRACKS) - return -1; - f->total_tracks++; - f->track[f->total_tracks - 1] = para_calloc( - sizeof(struct mp4_track)); - } else if (atom_type == ATOM_UDTA) { + if (atom_type == ATOM_TRAK && f->track.state == ATS_SEEN_MP4A) { + f->track.state = ATS_TRACK_CHANGE; + continue; + } + if (atom_type == ATOM_UDTA) { f->udta_offset = get_position(f) - header_size; f->udta_size = size; } @@ -602,7 +597,7 @@ static int open_file(const struct mp4_callback *cb, bool meta_only, struct mp4 * goto fail; } ret = -E_MP4_TRACK; - if (!f->audio_track) + if (f->track.channel_count == 0) goto fail; *result = f; return 1; @@ -621,16 +616,11 @@ void mp4_close(struct mp4 *f) { int32_t i; - for (i = 0; i < f->total_tracks; i++) { - if (f->track[i]) { - free(f->track[i]->stsz_table); - free(f->track[i]->stts_sample_count); - free(f->track[i]->stsc_first_chunk); - free(f->track[i]->stsc_samples_per_chunk); - free(f->track[i]->stco_chunk_offset); - free(f->track[i]); - } - } + free(f->track.stsz_table); + free(f->track.stts_sample_count); + free(f->track.stsc_first_chunk); + free(f->track.stsc_samples_per_chunk); + free(f->track.stco_chunk_offset); for (i = 0; i < f->meta.count; i++) { free(f->meta.tags[i].item); free(f->meta.tags[i].value); @@ -642,7 +632,7 @@ void mp4_close(struct mp4 *f) static int32_t chunk_of_sample(const struct mp4 *f, int32_t sample, int32_t *chunk) { - const struct mp4_track *t = f->audio_track; + const struct mp4_track *t = &f->track; uint32_t *fc = t->stsc_first_chunk, *spc = t->stsc_samples_per_chunk; int32_t chunk1, chunk1samples, n, total, i; @@ -667,7 +657,7 @@ static int32_t chunk_of_sample(const struct mp4 *f, int32_t sample, */ uint64_t mp4_get_duration(const struct mp4 *f) { - const struct mp4_track *t = f->audio_track; + const struct mp4_track *t = &f->track; if (t->time_scale == 0) return 0; @@ -676,7 +666,7 @@ uint64_t mp4_get_duration(const struct mp4 *f) int mp4_set_sample_position(struct mp4 *f, int32_t sample) { - const struct mp4_track *t = f->audio_track; + const struct mp4_track *t = &f->track; int32_t offset, chunk, chunk_sample; uint32_t n, srs; /* sample range size */ @@ -701,7 +691,7 @@ int mp4_set_sample_position(struct mp4 *f, int32_t sample) int32_t mp4_get_sample_size(const struct mp4 *f, int sample) { - const struct mp4_track *t = f->audio_track; + const struct mp4_track *t = &f->track; if (t->stsz_sample_size != 0) return t->stsz_sample_size; @@ -710,17 +700,17 @@ int32_t mp4_get_sample_size(const struct mp4 *f, int sample) uint32_t mp4_get_sample_rate(const struct mp4 *f) { - return f->audio_track->sample_rate; + return f->track.sample_rate; } uint32_t mp4_get_channel_count(const struct mp4 *f) { - return f->audio_track->channel_count; + return f->track.channel_count; } int32_t mp4_num_samples(const struct mp4 *f) { - const struct mp4_track *t = f->audio_track; + const struct mp4_track *t = &f->track; int32_t i; int32_t total = 0; -- 2.39.5