#include "string.h"
#include "mp4.h"
+/**
+ * The three states of the mp4 parser. The parser only loads the audio specific
+ * values and tables when it is in the second state.
+ */
+enum audio_track_state {
+ /** We haven't encountered an mp4a atom so far. */
+ ATS_INITIAL,
+ /** We have seen an mp4a atom but no subsequent trak atom yet. */
+ ATS_SEEN_MP4A,
+ /** A trak atom was seen *after* the mp4a atom. */
+ ATS_TRACK_CHANGE,
+};
+
struct mp4_track {
+ /* determines which atoms we still need to parse. */
+ enum audio_track_state state;
+
/* mp4a */
uint16_t channel_count;
uint16_t sample_rate;
uint64_t duration;
};
-#define MAX_TRACKS 1024
-
struct mp4 {
const struct mp4_callback *cb;
uint32_t udta_size;
uint8_t last_atom;
- /* incremental track index while reading the file */
- int32_t total_tracks;
- /* track data */
- struct mp4_track *track[MAX_TRACKS];
- /* the first audio track found */
- struct mp4_track *audio_track;
-
- /* metadata */
+ struct mp4_track track;
struct mp4_metadata meta;
};
{
int ret;
int32_t i;
- struct mp4_track *t;
+ struct mp4_track *t = &f->track;
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
+ if (t->state != ATS_SEEN_MP4A || t->stsz_table)
+ return 1;
skip_bytes(f, 4); /* version (1), flags (3) */
ret = read_int32(f, &t->stsz_sample_size);
if (ret <= 0)
{
int ret;
int32_t i;
- struct mp4_track *t;
+ struct mp4_track *t = &f->track;
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
- if (t->stts_entry_count)
- return 0;
+ if (t->state != ATS_SEEN_MP4A || t->stts_sample_count)
+ return 1;
skip_bytes(f, 4); /* version (1), flags (3) */
ret = read_int32(f, &t->stts_entry_count);
if (ret <= 0)
{
int ret;
int32_t i;
- struct mp4_track *t;
-
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
+ struct mp4_track *t = &f->track;
+ if (t->state != ATS_SEEN_MP4A)
+ return 1;
+ if (t->stsc_first_chunk || t->stsc_samples_per_chunk)
+ return 1;
skip_bytes(f, 4); /* version (1), flags (3) */
ret = read_int32(f, &t->stsc_entry_count);
if (ret <= 0)
{
int ret;
int32_t i;
- struct mp4_track *t;
-
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
+ struct mp4_track *t = &f->track;
+ if (t->state != ATS_SEEN_MP4A || t->stco_chunk_offset)
+ return 1;
skip_bytes(f, 4); /* version (1), flags (3) */
ret = read_int32(f, &t->stco_entry_count);
if (ret <= 0)
{
int ret;
uint32_t i, entry_count;
- struct mp4_track *t;
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
+ if (f->track.state != ATS_INITIAL)
+ return 1;
skip_bytes(f, 4); /* version (1), flags (3) */
ret = read_int32(f, &entry_count);
if (ret <= 0)
if (ret <= 0)
return ret;
skip += size;
- if (!f->audio_track && atom_type == ATOM_MP4A) {
- f->audio_track = t;
+ if (atom_type == ATOM_MP4A) {
+ f->track.state = ATS_SEEN_MP4A;
/* reserved (6), data reference index (2), reserved (8) */
skip_bytes(f, 16);
- ret = read_int16(f, &t->channel_count);
+ ret = read_int16(f, &f->track.channel_count);
if (ret <= 0)
return ret;
skip_bytes(f, 6);
- ret = read_int16(f, &t->sample_rate);
+ ret = read_int16(f, &f->track.sample_rate);
if (ret <= 0)
return ret;
}
{
int ret;
uint32_t version;
- struct mp4_track *t;
-
- if (f->total_tracks == 0)
- return -1;
- t = f->track[f->total_tracks - 1];
+ struct mp4_track *t = &f->track;
+ if (t->state != ATS_INITIAL)
+ return 1;
ret = read_int32(f, &version);
if (ret <= 0)
return ret;
if (size == 0)
return -1;
dest = get_position(f) + size - header_size;
- if (atom_type == ATOM_TRAK) {
- if (f->total_tracks >= MAX_TRACKS)
- return -1;
- f->total_tracks++;
- f->track[f->total_tracks - 1] = para_calloc(
- sizeof(struct mp4_track));
- } else if (atom_type == ATOM_UDTA) {
+ if (atom_type == ATOM_TRAK && f->track.state == ATS_SEEN_MP4A) {
+ f->track.state = ATS_TRACK_CHANGE;
+ continue;
+ }
+ if (atom_type == ATOM_UDTA) {
f->udta_offset = get_position(f) - header_size;
f->udta_size = size;
}
goto fail;
}
ret = -E_MP4_TRACK;
- if (!f->audio_track)
+ if (f->track.channel_count == 0)
goto fail;
*result = f;
return 1;
{
int32_t i;
- for (i = 0; i < f->total_tracks; i++) {
- if (f->track[i]) {
- free(f->track[i]->stsz_table);
- free(f->track[i]->stts_sample_count);
- free(f->track[i]->stsc_first_chunk);
- free(f->track[i]->stsc_samples_per_chunk);
- free(f->track[i]->stco_chunk_offset);
- free(f->track[i]);
- }
- }
+ free(f->track.stsz_table);
+ free(f->track.stts_sample_count);
+ free(f->track.stsc_first_chunk);
+ free(f->track.stsc_samples_per_chunk);
+ free(f->track.stco_chunk_offset);
for (i = 0; i < f->meta.count; i++) {
free(f->meta.tags[i].item);
free(f->meta.tags[i].value);
static int32_t chunk_of_sample(const struct mp4 *f, int32_t sample,
int32_t *chunk)
{
- const struct mp4_track *t = f->audio_track;
+ const struct mp4_track *t = &f->track;
uint32_t *fc = t->stsc_first_chunk, *spc = t->stsc_samples_per_chunk;
int32_t chunk1, chunk1samples, n, total, i;
*/
uint64_t mp4_get_duration(const struct mp4 *f)
{
- const struct mp4_track *t = f->audio_track;
+ const struct mp4_track *t = &f->track;
if (t->time_scale == 0)
return 0;
int mp4_set_sample_position(struct mp4 *f, int32_t sample)
{
- const struct mp4_track *t = f->audio_track;
+ const struct mp4_track *t = &f->track;
int32_t offset, chunk, chunk_sample;
uint32_t n, srs; /* sample range size */
int32_t mp4_get_sample_size(const struct mp4 *f, int sample)
{
- const struct mp4_track *t = f->audio_track;
+ const struct mp4_track *t = &f->track;
if (t->stsz_sample_size != 0)
return t->stsz_sample_size;
uint32_t mp4_get_sample_rate(const struct mp4 *f)
{
- return f->audio_track->sample_rate;
+ return f->track.sample_rate;
}
uint32_t mp4_get_channel_count(const struct mp4 *f)
{
- return f->audio_track->channel_count;
+ return f->track.channel_count;
}
int32_t mp4_num_samples(const struct mp4 *f)
{
- const struct mp4_track *t = f->audio_track;
+ const struct mp4_track *t = &f->track;
int32_t i;
int32_t total = 0;