32 #define ENABLE_VAAPI 0
35 #define MAX_SUPPORTED_WIDTH 1950
36 #define MAX_SUPPORTED_HEIGHT 1100
39 #include "libavutil/hwcontext_vaapi.h"
41 typedef struct VAAPIDecodeContext {
43 VAEntrypoint va_entrypoint;
45 VAContextID va_context;
47 #if FF_API_STRUCT_VAAPI_CONTEXT
50 struct vaapi_context *old_context;
51 AVBufferRef *device_ref;
55 AVHWDeviceContext *device;
56 AVVAAPIDeviceContext *hwctx;
58 AVHWFramesContext *frames;
59 AVVAAPIFramesContext *hwfc;
61 enum AVPixelFormat surface_format;
64 #endif // ENABLE_VAAPI
65 #endif // USE_HW_ACCEL
80 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
81 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
82 seek_audio_frame_found(0), seek_video_frame_found(0),
83 last_seek_max_frame(-1), seek_stagnant_count(0),
84 is_duration_known(false), largest_frame_processed(0),
85 current_video_frame(0), packet(NULL), duration_strategy(duration_strategy),
86 audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
87 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
95 pts_offset_seconds = NO_PTS_OFFSET;
96 video_pts_seconds = NO_PTS_OFFSET;
97 audio_pts_seconds = NO_PTS_OFFSET;
102 working_cache.SetMaxBytesFromInfo(init_working_cache_frames, info.width, info.height, info.sample_rate, info.channels);
103 final_cache.SetMaxBytesFromInfo(init_final_cache_frames, info.width, info.height, info.sample_rate, info.channels);
106 if (inspect_reader) {
128 if (abs(diff) <= amount)
139 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
141 const enum AVPixelFormat *p;
146 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
148 #if defined(__linux__)
150 case AV_PIX_FMT_VAAPI:
157 case AV_PIX_FMT_VDPAU:
167 case AV_PIX_FMT_DXVA2_VLD:
174 case AV_PIX_FMT_D3D11:
182 #if defined(__APPLE__)
184 case AV_PIX_FMT_VIDEOTOOLBOX:
193 case AV_PIX_FMT_CUDA:
213 return AV_PIX_FMT_NONE;
216 int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
220 case AV_CODEC_ID_H264:
221 case AV_CODEC_ID_MPEG2VIDEO:
222 case AV_CODEC_ID_VC1:
223 case AV_CODEC_ID_WMV1:
224 case AV_CODEC_ID_WMV2:
225 case AV_CODEC_ID_WMV3:
234 #endif // USE_HW_ACCEL
240 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
250 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
254 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
261 packet_status.
reset(
true);
264 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
266 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
273 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
280 if (videoStream == -1 && audioStream == -1)
284 if (videoStream != -1) {
289 pStream = pFormatCtx->streams[videoStream];
295 const AVCodec *pCodec = avcodec_find_decoder(codecId);
296 AVDictionary *
opts = NULL;
297 int retry_decode_open = 2;
302 if (
hw_de_on && (retry_decode_open==2)) {
304 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
307 retry_decode_open = 0;
312 if (pCodec == NULL) {
313 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
317 av_dict_set(&
opts,
"strict",
"experimental", 0);
321 int i_decoder_hw = 0;
323 char *adapter_ptr = NULL;
329 pCodecCtx->get_format = get_hw_dec_format;
331 if (adapter_num < 3 && adapter_num >=0) {
332 #if defined(__linux__)
333 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
334 adapter_ptr = adapter;
336 switch (i_decoder_hw) {
338 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
341 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
344 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
347 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
354 #elif defined(_WIN32)
357 switch (i_decoder_hw) {
359 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
362 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
365 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
368 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
371 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
374 #elif defined(__APPLE__)
377 switch (i_decoder_hw) {
379 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
382 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
385 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
395 #if defined(__linux__)
396 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
397 #elif defined(_WIN32)
398 if( adapter_ptr != NULL ) {
399 #elif defined(__APPLE__)
400 if( adapter_ptr != NULL ) {
409 hw_device_ctx = NULL;
411 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
412 const char* hw_name = av_hwdevice_get_type_name(hw_de_av_device_type);
413 std::string hw_msg =
"HW decode active: ";
414 hw_msg += (hw_name ? hw_name :
"unknown");
416 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
452 #endif // USE_HW_ACCEL
459 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
463 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
464 if (avcodec_return < 0) {
465 std::stringstream avcodec_error_msg;
466 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
472 AVHWFramesConstraints *constraints = NULL;
473 void *hwconfig = NULL;
474 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
478 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
479 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
480 #endif // ENABLE_VAAPI
482 if (pCodecCtx->coded_width < constraints->min_width ||
483 pCodecCtx->coded_height < constraints->min_height ||
484 pCodecCtx->coded_width > constraints->max_width ||
485 pCodecCtx->coded_height > constraints->max_height) {
488 retry_decode_open = 1;
491 av_buffer_unref(&hw_device_ctx);
492 hw_device_ctx = NULL;
497 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
498 retry_decode_open = 0;
500 av_hwframe_constraints_free(&constraints);
513 if (pCodecCtx->coded_width < 0 ||
514 pCodecCtx->coded_height < 0 ||
515 pCodecCtx->coded_width > max_w ||
516 pCodecCtx->coded_height > max_h ) {
517 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
519 retry_decode_open = 1;
522 av_buffer_unref(&hw_device_ctx);
523 hw_device_ctx = NULL;
527 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
528 retry_decode_open = 0;
536 retry_decode_open = 0;
537 #endif // USE_HW_ACCEL
538 }
while (retry_decode_open);
547 if (audioStream != -1) {
552 aStream = pFormatCtx->streams[audioStream];
558 const AVCodec *aCodec = avcodec_find_decoder(codecId);
564 bool audio_opened =
false;
565 if (aCodec != NULL) {
567 AVDictionary *
opts = NULL;
568 av_dict_set(&
opts,
"strict",
"experimental", 0);
571 audio_opened = (avcodec_open2(aCodecCtx, aCodec, &
opts) >= 0);
582 const bool invalid_audio_info =
587 (aCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE);
588 if (invalid_audio_info) {
590 "FFmpegReader::Open (Disable invalid audio stream)",
595 "sample_fmt",
static_cast<int>(aCodecCtx ? aCodecCtx->sample_fmt : AV_SAMPLE_FMT_NONE));
601 if (avcodec_is_open(aCodecCtx)) {
602 avcodec_flush_buffers(aCodecCtx);
612 "FFmpegReader::Open (Audio codec unavailable; disabling audio)",
613 "audioStream", audioStream);
629 "FFmpegReader::Open (Invalid FPS detected; applying fallback)",
637 "FFmpegReader::Open (Invalid video_timebase detected; applying fallback)",
644 AVDictionaryEntry *tag = NULL;
645 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
646 QString str_key = tag->key;
647 QString str_value = tag->value;
648 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
652 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
653 AVStream* st = pFormatCtx->streams[i];
654 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
656 for (
int j = 0; j < st->nb_side_data; j++) {
657 AVPacketSideData *sd = &st->side_data[j];
660 if (sd->type == AV_PKT_DATA_DISPLAYMATRIX &&
661 sd->size >= 9 *
sizeof(int32_t) &&
664 double rotation = -av_display_rotation_get(
665 reinterpret_cast<int32_t *
>(sd->data));
666 if (std::isnan(rotation)) rotation = 0;
670 else if (sd->type == AV_PKT_DATA_SPHERICAL) {
675 const AVSphericalMapping* map =
676 reinterpret_cast<const AVSphericalMapping*
>(sd->data);
679 const char* proj_name = av_spherical_projection_name(map->projection);
685 auto to_deg = [](int32_t v){
686 return (
double)v / 65536.0;
688 info.
metadata[
"spherical_yaw"] = std::to_string(to_deg(map->yaw));
689 info.
metadata[
"spherical_pitch"] = std::to_string(to_deg(map->pitch));
690 info.
metadata[
"spherical_roll"] = std::to_string(to_deg(map->roll));
698 previous_packet_location.
frame = -1;
732 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
738 AVPacket *recent_packet = packet;
743 int max_attempts = 128;
748 "attempts", attempts);
760 RemoveAVPacket(recent_packet);
765 if(avcodec_is_open(pCodecCtx)) {
766 avcodec_flush_buffers(pCodecCtx);
772 av_buffer_unref(&hw_device_ctx);
773 hw_device_ctx = NULL;
776 #endif // USE_HW_ACCEL
777 if (img_convert_ctx) {
778 sws_freeContext(img_convert_ctx);
779 img_convert_ctx =
nullptr;
781 if (pFrameRGB_cached) {
788 if(avcodec_is_open(aCodecCtx)) {
789 avcodec_flush_buffers(aCodecCtx);
801 working_cache.
Clear();
804 avformat_close_input(&pFormatCtx);
805 av_freep(&pFormatCtx);
812 largest_frame_processed = 0;
813 seek_audio_frame_found = 0;
814 seek_video_frame_found = 0;
815 current_video_frame = 0;
816 last_video_frame.reset();
817 last_final_video_frame.reset();
821 bool FFmpegReader::HasAlbumArt() {
825 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
826 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
829 double FFmpegReader::PickDurationSeconds()
const {
830 auto has_value = [](
double value) {
return value > 0.0; };
832 switch (duration_strategy) {
834 if (has_value(video_stream_duration_seconds))
835 return video_stream_duration_seconds;
836 if (has_value(audio_stream_duration_seconds))
837 return audio_stream_duration_seconds;
838 if (has_value(format_duration_seconds))
839 return format_duration_seconds;
842 if (has_value(audio_stream_duration_seconds))
843 return audio_stream_duration_seconds;
844 if (has_value(video_stream_duration_seconds))
845 return video_stream_duration_seconds;
846 if (has_value(format_duration_seconds))
847 return format_duration_seconds;
852 double longest = 0.0;
853 if (has_value(video_stream_duration_seconds))
854 longest = std::max(longest, video_stream_duration_seconds);
855 if (has_value(audio_stream_duration_seconds))
856 longest = std::max(longest, audio_stream_duration_seconds);
857 if (has_value(format_duration_seconds))
858 longest = std::max(longest, format_duration_seconds);
859 if (has_value(longest))
865 if (has_value(format_duration_seconds))
866 return format_duration_seconds;
867 if (has_value(inferred_duration_seconds))
868 return inferred_duration_seconds;
873 void FFmpegReader::ApplyDurationStrategy() {
875 const double chosen_seconds = PickDurationSeconds();
877 if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
880 is_duration_known =
false;
884 const int64_t frames =
static_cast<int64_t
>(std::llround(chosen_seconds * fps_value));
888 is_duration_known =
false;
893 info.
duration =
static_cast<float>(
static_cast<double>(frames) / fps_value);
894 is_duration_known =
true;
897 void FFmpegReader::UpdateAudioInfo() {
898 const int codec_channels =
907 if (codec_channels > 0 &&
920 auto record_duration = [](
double &target,
double seconds) {
922 target = std::max(target, seconds);
927 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
958 if (aStream->duration > 0) {
961 if (pFormatCtx->duration > 0) {
963 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
993 ApplyDurationStrategy();
996 AVDictionaryEntry *tag = NULL;
997 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
998 QString str_key = tag->key;
999 QString str_value = tag->value;
1000 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1004 void FFmpegReader::UpdateVideoInfo() {
1010 auto record_duration = [](
double &target,
double seconds) {
1012 target = std::max(target, seconds);
1017 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
1024 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
1036 if (pStream->sample_aspect_ratio.num != 0) {
1059 if (!check_interlace) {
1060 check_interlace =
true;
1062 switch(field_order) {
1063 case AV_FIELD_PROGRESSIVE:
1076 case AV_FIELD_UNKNOWN:
1078 check_interlace =
false;
1093 if (pFormatCtx->duration >= 0) {
1095 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1105 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1106 pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1108 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1112 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1113 pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name,
"gif") == 0) {
1114 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1118 ApplyDurationStrategy();
1121 AVDictionaryEntry *tag = NULL;
1122 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
1123 QString str_key = tag->key;
1124 QString str_value = tag->value;
1125 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1130 return this->is_duration_known;
1134 last_seek_max_frame = -1;
1135 seek_stagnant_count = 0;
1138 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
1141 if (requested_frame < 1)
1142 requested_frame = 1;
1147 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
1163 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
1177 int64_t diff = requested_frame - last_frame;
1178 if (diff >= 1 && diff <= 20) {
1180 frame = ReadStream(requested_frame);
1185 Seek(requested_frame);
1194 frame = ReadStream(requested_frame);
1202 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1204 bool check_seek =
false;
1205 int packet_error = -1;
1206 int64_t no_progress_count = 0;
1207 int64_t prev_packets_read = packet_status.
packets_read();
1210 double prev_video_pts_seconds = video_pts_seconds;
1220 CheckWorkingFrames(requested_frame);
1225 if (is_cache_found) {
1229 if (!hold_packet || !packet) {
1231 packet_error = GetNextPacket();
1232 if (packet_error < 0 && !packet) {
1243 check_seek = CheckSeek();
1255 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1259 ProcessVideoPacket(requested_frame);
1262 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1266 ProcessAudioPacket(requested_frame);
1271 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1272 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1274 if (packet->stream_index == videoStream) {
1276 }
else if (packet->stream_index == audioStream) {
1282 RemoveAVPacket(packet);
1292 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1305 const bool has_progress =
1309 (video_pts_seconds != prev_video_pts_seconds);
1312 no_progress_count = 0;
1314 no_progress_count++;
1315 if (no_progress_count >= 2000
1320 "requested_frame", requested_frame,
1321 "no_progress_count", no_progress_count,
1335 prev_video_pts_seconds = video_pts_seconds;
1343 "largest_frame_processed", largest_frame_processed,
1344 "Working Cache Count", working_cache.
Count());
1353 CheckWorkingFrames(requested_frame);
1369 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1372 if (!frame->has_image_data) {
1377 frame->AddAudioSilence(samples_in_frame);
1383 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1384 if (last_final_video_frame && last_final_video_frame->has_image_data
1385 && last_final_video_frame->number <= requested_frame) {
1386 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
1387 }
else if (last_video_frame && last_video_frame->has_image_data
1388 && last_video_frame->number <= requested_frame) {
1389 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
1393 f->AddAudioSilence(samples_in_frame);
1401 int FFmpegReader::GetNextPacket() {
1402 int found_packet = 0;
1403 AVPacket *next_packet;
1404 next_packet =
new AVPacket();
1405 found_packet = av_read_frame(pFormatCtx, next_packet);
1409 RemoveAVPacket(packet);
1412 if (found_packet >= 0) {
1414 packet = next_packet;
1417 if (packet->stream_index == videoStream) {
1419 }
else if (packet->stream_index == audioStream) {
1428 return found_packet;
1432 bool FFmpegReader::GetAVFrame() {
1433 int frameFinished = 0;
1439 int send_packet_err = 0;
1440 int64_t send_packet_pts = 0;
1441 if ((packet && packet->stream_index == videoStream) || !packet) {
1442 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1444 if (packet && send_packet_err >= 0) {
1445 send_packet_pts = GetPacketPTS();
1446 hold_packet =
false;
1455 #endif // USE_HW_ACCEL
1456 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1457 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1458 if (send_packet_err == AVERROR(EAGAIN)) {
1460 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1462 if (send_packet_err == AVERROR(EINVAL)) {
1463 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1465 if (send_packet_err == AVERROR(ENOMEM)) {
1466 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1473 int receive_frame_err = 0;
1474 AVFrame *next_frame2;
1480 #endif // USE_HW_ACCEL
1482 next_frame2 = next_frame;
1485 while (receive_frame_err >= 0) {
1486 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1488 if (receive_frame_err != 0) {
1489 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1491 if (receive_frame_err == AVERROR_EOF) {
1493 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1494 avcodec_flush_buffers(pCodecCtx);
1497 if (receive_frame_err == AVERROR(EINVAL)) {
1499 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1500 avcodec_flush_buffers(pCodecCtx);
1502 if (receive_frame_err == AVERROR(EAGAIN)) {
1504 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1506 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1508 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1519 if (next_frame2->format == hw_de_av_pix_fmt) {
1520 next_frame->format = AV_PIX_FMT_YUV420P;
1521 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1524 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1530 #endif // USE_HW_ACCEL
1532 next_frame = next_frame2;
1544 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1551 if (next_frame->pts != AV_NOPTS_VALUE) {
1554 video_pts = next_frame->pts;
1555 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1557 video_pts = next_frame->pkt_dts;
1561 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1570 #endif // USE_HW_ACCEL
1572 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1578 if (frameFinished) {
1582 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1585 #endif // IS_FFMPEG_3_2
1591 return frameFinished;
1595 bool FFmpegReader::CheckSeek() {
1598 const int64_t kSeekRetryMax = 5;
1599 const int kSeekStagnantMax = 2;
1603 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1611 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1613 if (max_seeked_frame == last_seek_max_frame) {
1614 seek_stagnant_count++;
1616 last_seek_max_frame = max_seeked_frame;
1617 seek_stagnant_count = 0;
1621 if (max_seeked_frame >= seeking_frame) {
1624 "is_video_seek", is_video_seek,
1625 "max_seeked_frame", max_seeked_frame,
1626 "seeking_frame", seeking_frame,
1627 "seeking_pts", seeking_pts,
1628 "seek_video_frame_found", seek_video_frame_found,
1629 "seek_audio_frame_found", seek_audio_frame_found);
1632 if (seek_count < kSeekRetryMax) {
1633 Seek(seeking_frame - (10 * seek_count * seek_count));
1634 }
else if (seek_stagnant_count >= kSeekStagnantMax) {
1636 Seek(seeking_frame - (10 * kSeekRetryMax * kSeekRetryMax));
1639 Seek(seeking_frame - (10 * seek_count * seek_count));
1644 "is_video_seek", is_video_seek,
1645 "packet->pts", GetPacketPTS(),
1646 "seeking_pts", seeking_pts,
1647 "seeking_frame", seeking_frame,
1648 "seek_video_frame_found", seek_video_frame_found,
1649 "seek_audio_frame_found", seek_audio_frame_found);
1663 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1666 int frame_finished = GetAVFrame();
1669 if (!frame_finished) {
1672 RemoveAVFrame(pFrame);
1678 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1681 if (!seek_video_frame_found && is_seeking)
1682 seek_video_frame_found = current_frame;
1688 working_cache.
Add(CreateFrame(requested_frame));
1700 AVFrame *pFrameRGB = pFrameRGB_cached;
1703 if (pFrameRGB ==
nullptr)
1705 pFrameRGB_cached = pFrameRGB;
1708 uint8_t *buffer =
nullptr;
1729 max_width = std::max(
float(max_width), max_width * max_scale_x);
1730 max_height = std::max(
float(max_height), max_height * max_scale_y);
1736 QSize width_size(max_width * max_scale_x,
1739 max_height * max_scale_y);
1741 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1742 max_width = std::max(max_width, width_size.width());
1743 max_height = std::max(max_height, width_size.height());
1745 max_width = std::max(max_width, height_size.width());
1746 max_height = std::max(max_height, height_size.height());
1753 float preview_ratio = 1.0;
1760 max_width =
info.
width * max_scale_x * preview_ratio;
1761 max_height =
info.
height * max_scale_y * preview_ratio;
1769 int original_height = height;
1770 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1772 float ratio = float(width) / float(height);
1773 int possible_width = round(max_height * ratio);
1774 int possible_height = round(max_width / ratio);
1776 if (possible_width <= max_width) {
1778 width = possible_width;
1779 height = max_height;
1783 height = possible_height;
1788 const int bytes_per_pixel = 4;
1789 int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1792 constexpr
size_t ALIGNMENT = 32;
1793 int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1794 buffer = (
unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1799 int scale_mode = SWS_FAST_BILINEAR;
1801 scale_mode = SWS_BICUBIC;
1803 img_convert_ctx = sws_getCachedContext(img_convert_ctx,
info.
width,
info.
height,
AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx), width, height,
PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1804 if (!img_convert_ctx)
1808 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1809 original_height, pFrameRGB->data, pFrameRGB->linesize);
1812 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1817 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1820 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1824 working_cache.
Add(f);
1827 last_video_frame = f;
1833 RemoveAVFrame(pFrame);
1839 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1843 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1846 if (packet && packet->pts != AV_NOPTS_VALUE) {
1848 location = GetAudioPTSLocation(packet->pts);
1851 if (!seek_audio_frame_found && is_seeking)
1852 seek_audio_frame_found = location.
frame;
1859 working_cache.
Add(CreateFrame(requested_frame));
1863 "requested_frame", requested_frame,
1864 "target_frame", location.
frame,
1868 int frame_finished = 0;
1872 int packet_samples = 0;
1876 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1877 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1881 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1882 if (receive_frame_err >= 0) {
1885 if (receive_frame_err == AVERROR_EOF) {
1889 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1891 avcodec_flush_buffers(aCodecCtx);
1893 if (receive_frame_err != 0) {
1898 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1901 if (frame_finished) {
1907 audio_pts = audio_frame->pts;
1910 location = GetAudioPTSLocation(audio_pts);
1913 int plane_size = -1;
1919 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1923 packet_samples = audio_frame->nb_samples * nb_channels;
1932 int pts_remaining_samples = packet_samples /
info.
channels;
1935 if (pts_remaining_samples == 0) {
1937 "packet_samples", packet_samples,
1939 "pts_remaining_samples", pts_remaining_samples);
1943 while (pts_remaining_samples) {
1948 int samples = samples_per_frame - previous_packet_location.
sample_start;
1949 if (samples > pts_remaining_samples)
1950 samples = pts_remaining_samples;
1953 pts_remaining_samples -= samples;
1955 if (pts_remaining_samples > 0) {
1957 previous_packet_location.
frame++;
1966 "packet_samples", packet_samples,
1974 audio_converted->nb_samples = audio_frame->nb_samples;
1975 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
1991 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
2000 audio_converted->data,
2001 audio_converted->linesize[0],
2002 audio_converted->nb_samples,
2004 audio_frame->linesize[0],
2005 audio_frame->nb_samples);
2008 int64_t starting_frame_number = -1;
2009 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
2011 starting_frame_number = location.
frame;
2012 int channel_buffer_size = nb_samples;
2013 auto *channel_buffer = (
float *) (audio_converted->data[channel_filter]);
2017 int remaining_samples = channel_buffer_size;
2018 while (remaining_samples > 0) {
2023 int samples = std::fmin(samples_per_frame - start, remaining_samples);
2026 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
2029 f->AddAudio(
true, channel_filter, start, channel_buffer, samples, 1.0f);
2033 "frame", starting_frame_number,
2036 "channel", channel_filter,
2037 "samples_per_frame", samples_per_frame);
2040 working_cache.
Add(f);
2043 remaining_samples -= samples;
2046 if (remaining_samples > 0)
2047 channel_buffer += samples;
2050 starting_frame_number++;
2058 av_free(audio_converted->data[0]);
2067 "requested_frame", requested_frame,
2068 "starting_frame", location.
frame,
2069 "end_frame", starting_frame_number - 1,
2070 "audio_pts_seconds", audio_pts_seconds);
2076 void FFmpegReader::Seek(int64_t requested_frame) {
2078 if (requested_frame < 1)
2079 requested_frame = 1;
2082 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
2089 "requested_frame", requested_frame,
2090 "seek_count", seek_count,
2091 "last_frame", last_frame);
2094 working_cache.
Clear();
2098 video_pts_seconds = NO_PTS_OFFSET;
2100 audio_pts_seconds = NO_PTS_OFFSET;
2101 hold_packet =
false;
2103 current_video_frame = 0;
2104 largest_frame_processed = 0;
2105 last_final_video_frame.reset();
2110 packet_status.
reset(
false);
2116 int buffer_amount = 12;
2117 if (requested_frame - buffer_amount < 20) {
2131 if (seek_count == 1) {
2134 seeking_pts = ConvertFrameToVideoPTS(1);
2136 seek_audio_frame_found = 0;
2137 seek_video_frame_found = 0;
2141 bool seek_worked =
false;
2142 int64_t seek_target = 0;
2146 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
2151 is_video_seek =
true;
2158 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
2163 is_video_seek =
false;
2172 avcodec_flush_buffers(aCodecCtx);
2176 avcodec_flush_buffers(pCodecCtx);
2179 previous_packet_location.
frame = -1;
2184 if (seek_count == 1) {
2186 seeking_pts = seek_target;
2187 seeking_frame = requested_frame;
2189 seek_audio_frame_found = 0;
2190 seek_video_frame_found = 0;
2218 int64_t FFmpegReader::GetPacketPTS() {
2220 int64_t current_pts = packet->pts;
2221 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2222 current_pts = packet->dts;
2228 return AV_NOPTS_VALUE;
2233 void FFmpegReader::UpdatePTSOffset() {
2234 if (pts_offset_seconds != NO_PTS_OFFSET) {
2238 pts_offset_seconds = 0.0;
2239 double video_pts_offset_seconds = 0.0;
2240 double audio_pts_offset_seconds = 0.0;
2242 bool has_video_pts =
false;
2245 has_video_pts =
true;
2247 bool has_audio_pts =
false;
2250 has_audio_pts =
true;
2254 while (!has_video_pts || !has_audio_pts) {
2256 if (GetNextPacket() < 0)
2261 int64_t pts = GetPacketPTS();
2264 if (!has_video_pts && packet->stream_index == videoStream) {
2270 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2271 has_video_pts =
true;
2274 else if (!has_audio_pts && packet->stream_index == audioStream) {
2280 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2281 has_audio_pts =
true;
2291 pts_offset_seconds = video_pts_offset_seconds;
2293 pts_offset_seconds = audio_pts_offset_seconds;
2294 }
else if (has_video_pts && has_audio_pts) {
2296 pts_offset_seconds = video_pts_offset_seconds;
2301 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2303 int64_t previous_video_frame = current_video_frame;
2305 const double video_timebase_value =
2311 double video_seconds = (double(pts) * video_timebase_value) + pts_offset_seconds;
2314 int64_t frame = round(video_seconds * fps_value) + 1;
2317 if (current_video_frame == 0)
2318 current_video_frame = frame;
2322 if (frame == previous_video_frame) {
2327 current_video_frame++;
2336 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2338 const double video_timebase_value =
2344 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2347 int64_t video_pts = round(seconds / video_timebase_value);
2354 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2356 const double audio_timebase_value =
2362 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2365 int64_t audio_pts = round(seconds / audio_timebase_value);
2372 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2373 const double audio_timebase_value =
2380 double audio_seconds = (double(pts) * audio_timebase_value) + pts_offset_seconds;
2383 double frame = (audio_seconds * fps_value) + 1;
2386 int64_t whole_frame = int64_t(frame);
2389 double sample_start_percentage = frame - double(whole_frame);
2395 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2398 if (whole_frame < 1)
2400 if (sample_start < 0)
2407 if (previous_packet_location.
frame != -1) {
2408 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2409 int64_t orig_frame = location.
frame;
2414 location.
frame = previous_packet_location.
frame;
2417 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2426 previous_packet_location = location;
2433 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2435 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2439 output = working_cache.
GetFrame(requested_frame);
2440 if(output)
return output;
2448 working_cache.
Add(output);
2451 if (requested_frame > largest_frame_processed)
2452 largest_frame_processed = requested_frame;
2459 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2462 bool seek_trash =
false;
2463 int64_t max_seeked_frame = seek_audio_frame_found;
2464 if (seek_video_frame_found > max_seeked_frame) {
2465 max_seeked_frame = seek_video_frame_found;
2467 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2468 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2476 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2479 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2482 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2483 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2486 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2489 std::shared_ptr<Frame> f = *working_itr;
2492 if (!f || f->number > requested_frame) {
2498 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2499 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2502 bool is_video_ready =
false;
2503 bool is_audio_ready =
false;
2504 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2505 if ((frame_pts_seconds <= video_pts_seconds)
2506 || (recent_pts_diff > 1.5)
2510 is_video_ready =
true;
2512 "frame_number", f->number,
2513 "frame_pts_seconds", frame_pts_seconds,
2514 "video_pts_seconds", video_pts_seconds,
2515 "recent_pts_diff", recent_pts_diff);
2520 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2521 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2525 if (!f->has_image_data
2526 && last_final_video_frame
2527 && last_final_video_frame->has_image_data
2528 && last_final_video_frame->number <= f->number) {
2529 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2533 if (!f->has_image_data
2535 && last_video_frame->has_image_data
2536 && last_video_frame->number <= f->number) {
2537 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2541 if (!f->has_image_data) {
2543 "FFmpegReader::CheckWorkingFrames (no previous image found; using black frame)",
2544 "frame_number", f->number);
2545 f->AddColor(
"#000000");
2550 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2551 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2552 || (recent_pts_diff > 1.5)
2557 is_audio_ready =
true;
2559 "frame_number", f->number,
2560 "frame_pts_seconds", frame_pts_seconds,
2561 "audio_pts_seconds", audio_pts_seconds,
2562 "audio_pts_diff", audio_pts_diff,
2563 "recent_pts_diff", recent_pts_diff);
2565 bool is_seek_trash = IsPartialFrame(f->number);
2573 "frame_number", f->number,
2574 "is_video_ready", is_video_ready,
2575 "is_audio_ready", is_audio_ready,
2581 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2584 "requested_frame", requested_frame,
2585 "f->number", f->number,
2586 "is_seek_trash", is_seek_trash,
2587 "Working Cache Count", working_cache.
Count(),
2591 if (!is_seek_trash) {
2594 if (f->has_image_data) {
2595 last_final_video_frame = f;
2599 working_cache.
Remove(f->number);
2602 last_frame = f->number;
2605 working_cache.
Remove(f->number);
2612 working_frames.clear();
2613 working_frames.shrink_to_fit();
2617 void FFmpegReader::CheckFPS() {
2625 int frames_per_second[3] = {0,0,0};
2626 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2629 int all_frames_detected = 0;
2630 int starting_frames_detected = 0;
2635 if (GetNextPacket() < 0)
2640 if (packet->stream_index == videoStream) {
2643 fps_index = int(video_seconds);
2646 if (fps_index >= 0 && fps_index < max_fps_index) {
2648 starting_frames_detected++;
2649 frames_per_second[fps_index]++;
2653 all_frames_detected++;
2658 float avg_fps = 30.0;
2659 if (starting_frames_detected > 0 && fps_index > 0) {
2660 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2664 if (avg_fps < 8.0) {
2673 if (all_frames_detected > 0) {
2687 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2691 av_freep(&remove_frame->data[0]);
2699 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2704 delete remove_packet;
2719 root[
"type"] =
"FFmpegReader";
2720 root[
"path"] =
path;
2721 switch (duration_strategy) {
2723 root[
"duration_strategy"] =
"VideoPreferred";
2726 root[
"duration_strategy"] =
"AudioPreferred";
2730 root[
"duration_strategy"] =
"LongestStream";
2747 catch (
const std::exception& e) {
2749 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2760 if (!root[
"path"].isNull())
2761 path = root[
"path"].asString();
2762 if (!root[
"duration_strategy"].isNull()) {
2763 const std::string strategy = root[
"duration_strategy"].asString();
2764 if (strategy ==
"VideoPreferred") {
2766 }
else if (strategy ==
"AudioPreferred") {