Rename ffmpeg-4.2-fit to ffmpeg-4-fit

2025-03-09 15:49:59 +00:00 · 2021-03-02 17:48:40 +08:00 · 2021-03-02 17:48:40 +08:00 · 27712fdda7
commit 27712fdda7
parent b19074721c
720 changed files with 14 additions and 14 deletions
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/Makefile
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/Makefile
@ -0,0 +1,60 @@
+# subsystems
+OBJS-$(CONFIG_FFT)                      += aarch64/fft_init_aarch64.o
+OBJS-$(CONFIG_FMTCONVERT)               += aarch64/fmtconvert_init.o
+OBJS-$(CONFIG_H264CHROMA)               += aarch64/h264chroma_init_aarch64.o
+OBJS-$(CONFIG_H264DSP)                  += aarch64/h264dsp_init_aarch64.o
+OBJS-$(CONFIG_H264PRED)                 += aarch64/h264pred_init.o
+OBJS-$(CONFIG_H264QPEL)                 += aarch64/h264qpel_init_aarch64.o
+OBJS-$(CONFIG_HPELDSP)                  += aarch64/hpeldsp_init_aarch64.o
+OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
+OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
+OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o
+OBJS-$(CONFIG_VP8DSP)                   += aarch64/vp8dsp_init_aarch64.o
+
+# decoders/encoders
+OBJS-$(CONFIG_AAC_DECODER)              += aarch64/aacpsdsp_init_aarch64.o \
+                                           aarch64/sbrdsp_init_aarch64.o
+OBJS-$(CONFIG_DCA_DECODER)              += aarch64/synth_filter_init.o
+OBJS-$(CONFIG_OPUS_DECODER)             += aarch64/opusdsp_init.o
+OBJS-$(CONFIG_RV40_DECODER)             += aarch64/rv40dsp_init_aarch64.o
+OBJS-$(CONFIG_VC1DSP)                   += aarch64/vc1dsp_init_aarch64.o
+OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
+OBJS-$(CONFIG_VP9_DECODER)              += aarch64/vp9dsp_init_10bpp_aarch64.o \
+                                           aarch64/vp9dsp_init_12bpp_aarch64.o \
+                                           aarch64/vp9dsp_init_aarch64.o
+
+# ARMv8 optimizations
+
+# subsystems
+ARMV8-OBJS-$(CONFIG_VIDEODSP)           += aarch64/videodsp.o
+
+# NEON optimizations
+
+# subsystems
+NEON-OBJS-$(CONFIG_AAC_DECODER)         += aarch64/sbrdsp_neon.o
+NEON-OBJS-$(CONFIG_FFT)                 += aarch64/fft_neon.o
+NEON-OBJS-$(CONFIG_FMTCONVERT)          += aarch64/fmtconvert_neon.o
+NEON-OBJS-$(CONFIG_H264CHROMA)          += aarch64/h264cmc_neon.o
+NEON-OBJS-$(CONFIG_H264DSP)             += aarch64/h264dsp_neon.o              \
+                                           aarch64/h264idct_neon.o
+NEON-OBJS-$(CONFIG_H264PRED)            += aarch64/h264pred_neon.o
+NEON-OBJS-$(CONFIG_H264QPEL)            += aarch64/h264qpel_neon.o             \
+                                           aarch64/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_IDCTDSP)             += aarch64/idctdsp_init_aarch64.o      \
+                                           aarch64/simple_idct_neon.o
+NEON-OBJS-$(CONFIG_MDCT)                += aarch64/mdct_neon.o
+NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
+NEON-OBJS-$(CONFIG_VP8DSP)              += aarch64/vp8dsp_neon.o
+
+# decoders/encoders
+NEON-OBJS-$(CONFIG_AAC_DECODER)         += aarch64/aacpsdsp_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER)         += aarch64/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_OPUS_DECODER)        += aarch64/opusdsp_neon.o
+NEON-OBJS-$(CONFIG_VORBIS_DECODER)      += aarch64/vorbisdsp_neon.o
+NEON-OBJS-$(CONFIG_VP9_DECODER)         += aarch64/vp9itxfm_16bpp_neon.o       \
+                                           aarch64/vp9itxfm_neon.o             \
+                                           aarch64/vp9lpf_16bpp_neon.o         \
+                                           aarch64/vp9lpf_neon.o               \
+                                           aarch64/vp9mc_16bpp_neon.o          \
+                                           aarch64/vp9mc_neon.o
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/aacpsdsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/aacpsdsp_init_aarch64.c
@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/aacpsdsp.h"
+
+void ff_ps_add_squares_neon(float *dst, const float (*src)[2], int n);
+void ff_ps_mul_pair_single_neon(float (*dst)[2], float (*src0)[2],
+                                float *src1, int n);
+void ff_ps_hybrid_analysis_neon(float (*out)[2], float (*in)[2],
+                                const float (*filter)[8][2],
+                                ptrdiff_t stride, int n);
+void ff_ps_stereo_interpolate_neon(float (*l)[2], float (*r)[2],
+                                   float h[2][4], float h_step[2][4],
+                                   int len);
+void ff_ps_stereo_interpolate_ipdopd_neon(float (*l)[2], float (*r)[2],
+                                          float h[2][4], float h_step[2][4],
+                                          int len);
+
+av_cold void ff_psdsp_init_aarch64(PSDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->add_squares           = ff_ps_add_squares_neon;
+        s->mul_pair_single       = ff_ps_mul_pair_single_neon;
+        s->hybrid_analysis       = ff_ps_hybrid_analysis_neon;
+        s->stereo_interpolate[0] = ff_ps_stereo_interpolate_neon;
+        s->stereo_interpolate[1] = ff_ps_stereo_interpolate_ipdopd_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/asm-offsets.h
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/asm-offsets.h
@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_ASM_OFFSETS_H
+#define AVCODEC_AARCH64_ASM_OFFSETS_H
+
+/* FFTContext */
+#define IMDCT_HALF                      0x48
+
+#endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/cabac.h
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/cabac.h
@ -0,0 +1,104 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_CABAC_H
+#define AVCODEC_AARCH64_CABAC_H
+
+#include "config.h"
+#if HAVE_INLINE_ASM
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavcodec/cabac.h"
+
+#define get_cabac_inline get_cabac_inline_aarch64
+static av_always_inline int get_cabac_inline_aarch64(CABACContext *c,
+                                                     uint8_t *const state)
+{
+    int bit;
+    void *reg_a, *reg_b, *reg_c, *tmp;
+
+    __asm__ volatile(
+        "ldrb       %w[bit]       , [%[state]]                  \n\t"
+        "add        %[r_b]        , %[tables]   , %[lps_off]    \n\t"
+        "mov        %w[tmp]       , %w[range]                   \n\t"
+        "and        %w[range]     , %w[range]   , #0xC0         \n\t"
+        "lsl        %w[r_c]       , %w[range]   , #1            \n\t"
+        "add        %[r_b]        , %[r_b]      , %w[bit], UXTW \n\t"
+        "ldrb       %w[range]     , [%[r_b], %w[r_c], SXTW]     \n\t"
+        "sub        %w[r_c]       , %w[tmp]     , %w[range]     \n\t"
+        "lsl        %w[tmp]       , %w[r_c]     , #17           \n\t"
+        "cmp        %w[tmp]       , %w[low]                     \n\t"
+        "csel       %w[tmp]       , %w[tmp]     , wzr      , cc \n\t"
+        "csel       %w[range]     , %w[r_c]     , %w[range], gt \n\t"
+        "cinv       %w[bit]       , %w[bit]     , cc            \n\t"
+        "sub        %w[low]       , %w[low]     , %w[tmp]       \n\t"
+        "add        %[r_b]        , %[tables]   , %[norm_off]   \n\t"
+        "add        %[r_a]        , %[tables]   , %[mlps_off]   \n\t"
+        "ldrb       %w[tmp]       , [%[r_b], %w[range], SXTW]   \n\t"
+        "ldrb       %w[r_a]       , [%[r_a], %w[bit], SXTW]     \n\t"
+        "lsl        %w[low]       , %w[low]     , %w[tmp]       \n\t"
+        "lsl        %w[range]     , %w[range]   , %w[tmp]       \n\t"
+        "uxth       %w[r_c]       , %w[low]                     \n\t"
+        "strb       %w[r_a]       , [%[state]]                  \n\t"
+        "cbnz       %w[r_c]       , 2f                          \n\t"
+        "ldr        %[r_c]        , [%[c], %[byte]]             \n\t"
+        "ldr        %[r_a]        , [%[c], %[end]]              \n\t"
+        "ldrh       %w[tmp]       , [%[r_c]]                    \n\t"
+        "cmp        %[r_c]        , %[r_a]                      \n\t"
+        "b.ge       1f                                          \n\t"
+        "add        %[r_a]        , %[r_c]      , #2            \n\t"
+        "str        %[r_a]        , [%[c], %[byte]]             \n\t"
+        "1:                                                     \n\t"
+        "sub        %w[r_c]       , %w[low]     , #1            \n\t"
+        "eor        %w[r_c]       , %w[r_c]     , %w[low]       \n\t"
+        "rev        %w[tmp]       , %w[tmp]                     \n\t"
+        "lsr        %w[r_c]       , %w[r_c]     , #15           \n\t"
+        "lsr        %w[tmp]       , %w[tmp]     , #15           \n\t"
+        "ldrb       %w[r_c]       , [%[r_b], %w[r_c], SXTW]     \n\t"
+        "mov        %w[r_b]       , #0xFFFF                     \n\t"
+        "mov        %w[r_a]       , #7                          \n\t"
+        "sub        %w[tmp]       , %w[tmp]     , %w[r_b]       \n\t"
+        "sub        %w[r_c]       , %w[r_a]     , %w[r_c]       \n\t"
+        "lsl        %w[tmp]       , %w[tmp]     , %w[r_c]       \n\t"
+        "add        %w[low]       , %w[low]     , %w[tmp]       \n\t"
+        "2:                                                     \n\t"
+        :    [bit]"=&r"(bit),
+             [low]"+&r"(c->low),
+           [range]"+&r"(c->range),
+             [r_a]"=&r"(reg_a),
+             [r_b]"=&r"(reg_b),
+             [r_c]"=&r"(reg_c),
+             [tmp]"=&r"(tmp)
+        :        [c]"r"(c),
+             [state]"r"(state),
+            [tables]"r"(ff_h264_cabac_tables),
+              [byte]"i"(offsetof(CABACContext, bytestream)),
+               [end]"i"(offsetof(CABACContext, bytestream_end)),
+          [norm_off]"I"(H264_NORM_SHIFT_OFFSET),
+           [lps_off]"I"(H264_LPS_RANGE_OFFSET),
+          [mlps_off]"I"(H264_MLPS_STATE_OFFSET + 128)
+        : "memory", "cc"
+        );
+
+    return bit & 1;
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_AARCH64_CABAC_H */
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/fft_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/fft_init_aarch64.c
@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+
+#include "libavcodec/fft.h"
+
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+
+void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+
+av_cold void ff_fft_init_aarch64(FFTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->fft_permute  = ff_fft_permute_neon;
+        s->fft_calc     = ff_fft_calc_neon;
+#if CONFIG_MDCT
+        s->imdct_calc   = ff_imdct_calc_neon;
+        s->imdct_half   = ff_imdct_half_neon;
+        s->mdct_calc    = ff_mdct_calc_neon;
+        s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
+#endif
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/fmtconvert_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/fmtconvert_init.c
@ -0,0 +1,43 @@
+/*
+ * ARM optimized Format Conversion Utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fmtconvert.h"
+
+void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst,
+                                        const int32_t *src, const float *mul,
+                                        int len);
+void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
+                                        float mul, int len);
+
+av_cold void ff_fmt_convert_init_aarch64(FmtConvertContext *c,
+                                         AVCodecContext *avctx)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon;
+        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264chroma_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264chroma_init_aarch64.c
@ -0,0 +1,59 @@
+/*
+ * ARM NEON optimised H.264 chroma functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/h264chroma.h"
+
+#include "config.h"
+
+void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
+{
+    const int high_bit_depth = bit_depth > 8;
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags) && !high_bit_depth) {
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
+
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264dsp_init_aarch64.c
@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/h264dsp.h"
+
+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                     int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                     int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                       int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                       int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                          int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma422_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                                int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                                   int alpha, int beta);
+
+void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                   int log2_den, int weight, int offset);
+void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                  int log2_den, int weight, int offset);
+void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height,
+                                  int log2_den, int weight, int offset);
+
+void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                     int height, int log2_den, int weightd,
+                                     int weights, int offset);
+void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                    int height, int log2_den, int weightd,
+                                    int weights, int offset);
+void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                    int height, int log2_den, int weightd,
+                                    int weights, int offset);
+
+void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
+                             int16_t *block, int stride,
+                             const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
+                                  int16_t *block, int stride,
+                                  const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
+                            int16_t *block, int stride,
+                            const uint8_t nnzc[6*8]);
+
+void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
+                             int16_t *block, int stride,
+                             const uint8_t nnzc[6*8]);
+
+av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
+                                     const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags) && bit_depth == 8) {
+        c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
+        c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+        c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
+
+        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+        c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
+
+        if (chroma_format_idc <= 1) {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+        } else {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
+            c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+        }
+
+        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+
+        c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+        c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+        c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+
+        c->h264_idct_add        = ff_h264_idct_add_neon;
+        c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
+        c->h264_idct_add16      = ff_h264_idct_add16_neon;
+        c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+        if (chroma_format_idc <= 1)
+            c->h264_idct_add8   = ff_h264_idct_add8_neon;
+        c->h264_idct8_add       = ff_h264_idct8_add_neon;
+        c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
+        c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264pred_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264pred_init.c
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+                                        const int bit_depth,
+                                        const int chroma_format_idc)
+{
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
+
+    if (chroma_format_idc <= 1) {
+        h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
+        h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+            h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+        h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+        if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+            codec_id != AV_CODEC_ID_VP8) {
+            h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
+            h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
+            h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+        }
+    }
+
+    h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
+    h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon;
+    h->pred16x16[HOR_PRED8x8    ] = ff_pred16x16_hor_neon;
+    h->pred16x16[LEFT_DC_PRED8x8] = ff_pred16x16_left_dc_neon;
+    h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon;
+    h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_neon;
+    if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
+        codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+        h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
+}
+
+av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                                       int bit_depth, const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264qpel_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/h264qpel_init_aarch64.c
@ -0,0 +1,172 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/h264qpel.h"
+
+void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_avg_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_avg_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_h264_qpel8_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
+{
+    const int high_bit_depth = bit_depth > 8;
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags) && !high_bit_depth) {
+        c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
+        c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
+        c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
+        c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
+        c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
+        c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
+        c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
+        c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
+        c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
+        c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
+        c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
+        c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
+        c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
+        c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
+        c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
+        c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
+
+        c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
+        c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
+        c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
+        c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
+        c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
+        c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
+        c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
+        c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
+        c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
+        c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
+        c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
+        c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
+        c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
+        c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
+        c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
+        c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
+
+        c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 2] = ff_avg_h264_qpel16_mc20_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 3] = ff_avg_h264_qpel16_mc30_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 4] = ff_avg_h264_qpel16_mc01_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 5] = ff_avg_h264_qpel16_mc11_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 6] = ff_avg_h264_qpel16_mc21_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 7] = ff_avg_h264_qpel16_mc31_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 8] = ff_avg_h264_qpel16_mc02_neon;
+        c->avg_h264_qpel_pixels_tab[0][ 9] = ff_avg_h264_qpel16_mc12_neon;
+        c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_neon;
+        c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_neon;
+        c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_neon;
+        c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_neon;
+        c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_neon;
+        c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon;
+
+        c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon;
+        c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon;
+        c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon;
+        c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon;
+        c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon;
+        c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
+        c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
+        c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/hpeldsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/hpeldsp_init_aarch64.c
@ -0,0 +1,123 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/hpeldsp.h"
+
+void     ff_put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void      ff_put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+
+void  ff_put_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_put_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void   ff_put_pixels8_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void   ff_put_pixels8_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_put_pixels8_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+
+void     ff_avg_pixels16_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void      ff_avg_pixels8_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_avg_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void   ff_avg_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void  ff_avg_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+
+void  ff_avg_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void  ff_avg_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
+
+av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
+        c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
+        c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
+        c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
+        c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
+        c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
+        c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
+        c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
+
+        c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
+        c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
+        c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
+        c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
+
+        c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
+        c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
+        c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
+        c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
+        c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
+        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
+        c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
+        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
+
+        c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
+        c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
+        c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
+        c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/idct.h
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/idct.h
@ -0,0 +1,28 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_IDCT_H
+#define AVCODEC_AARCH64_IDCT_H
+
+#include <stdint.h>
+
+void ff_simple_idct_neon(int16_t *data);
+void ff_simple_idct_put_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+void ff_simple_idct_add_neon(uint8_t *dest, ptrdiff_t line_size, int16_t *data);
+
+#endif /* AVCODEC_AARCH64_IDCT_H */
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/idctdsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/idctdsp_init_aarch64.c
@ -0,0 +1,41 @@
+/*
+ * ARM-NEON-optimized IDCT functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idct.h"
+
+av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
+                                     unsigned high_bit_depth)
+{
+    if (!avctx->lowres && !high_bit_depth) {
+        if (avctx->idct_algo == FF_IDCT_AUTO ||
+            avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+            avctx->idct_algo == FF_IDCT_SIMPLENEON) {
+            c->idct_put  = ff_simple_idct_put_neon;
+            c->idct_add  = ff_simple_idct_add_neon;
+            c->idct      = ff_simple_idct_neon;
+            c->perm_type = FF_IDCT_PERM_PARTTRANS;
+        }
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/mpegaudiodsp_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/mpegaudiodsp_init.c
@ -0,0 +1,40 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/mpegaudiodsp.h"
+#include "config.h"
+
+void ff_mpadsp_apply_window_fixed_neon(int32_t *synth_buf, int32_t *window,
+                                       int *dither, int16_t *samples, ptrdiff_t incr);
+void ff_mpadsp_apply_window_float_neon(float *synth_buf, float *window,
+                                       int *dither, float *samples, ptrdiff_t incr);
+
+av_cold void ff_mpadsp_init_aarch64(MPADSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->apply_window_fixed = ff_mpadsp_apply_window_fixed_neon;
+        s->apply_window_float = ff_mpadsp_apply_window_float_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/opusdsp_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/opusdsp_init.c
@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/opusdsp.h"
+
+void ff_opus_postfilter_neon(float *data, int period, float *gains, int len);
+float ff_opus_deemphasis_neon(float *out, float *in, float coeff, int len);
+
+av_cold void ff_opus_dsp_init_aarch64(OpusDSP *ctx)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        ctx->postfilter = ff_opus_postfilter_neon;
+        ctx->deemphasis = ff_opus_deemphasis_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/rv40dsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/rv40dsp_init_aarch64.c
@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/rv34dsp.h"
+
+#include "config.h"
+
+void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
+        c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
+        c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon;
+        c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/sbrdsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/sbrdsp_init_aarch64.c
@ -0,0 +1,70 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/sbrdsp.h"
+
+void ff_sbr_sum64x5_neon(float *z);
+float ff_sbr_sum_square_neon(float (*x)[2], int n);
+void ff_sbr_neg_odd_64_neon(float *x);
+void ff_sbr_qmf_pre_shuffle_neon(float *z);
+void ff_sbr_qmf_post_shuffle_neon(float W[32][2], const float *z);
+void ff_sbr_qmf_deint_neg_neon(float *v, const float *src);
+void ff_sbr_qmf_deint_bfly_neon(float *v, const float *src0, const float *src1);
+void ff_sbr_hf_g_filt_neon(float (*Y)[2], const float (*X_high)[40][2],
+                           const float *g_filt, int m_max, intptr_t ixh);
+void ff_sbr_hf_gen_neon(float (*X_high)[2], const float (*X_low)[2],
+                        const float alpha0[2], const float alpha1[2],
+                        float bw, int start, int end);
+void ff_sbr_autocorrelate_neon(const float x[40][2], float phi[3][2][2]);
+void ff_sbr_hf_apply_noise_0_neon(float Y[64][2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_1_neon(float Y[64][2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_2_neon(float Y[64][2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_3_neon(float Y[64][2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+
+av_cold void ff_sbrdsp_init_aarch64(SBRDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->sum64x5 = ff_sbr_sum64x5_neon;
+        s->sum_square = ff_sbr_sum_square_neon;
+        s->neg_odd_64 = ff_sbr_neg_odd_64_neon;
+        s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_neon;
+        s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_neon;
+        s->qmf_deint_neg = ff_sbr_qmf_deint_neg_neon;
+        s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_neon;
+        s->hf_g_filt = ff_sbr_hf_g_filt_neon;
+        s->hf_gen = ff_sbr_hf_gen_neon;
+        s->autocorrelate = ff_sbr_autocorrelate_neon;
+        s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_neon;
+        s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_neon;
+        s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_neon;
+        s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/synth_filter_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/synth_filter_init.c
@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
+
+#include "asm-offsets.h"
+
+#if HAVE_NEON || HAVE_VFP
+AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
+#endif
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+                                float *synth_buf_ptr, int *synth_buf_offset,
+                                float synth_buf2[32], const float window[512],
+                                float out[32], const float in[32],
+                                float scale);
+
+av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        s->synth_filter_float = ff_synth_filter_float_neon;
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vc1dsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vc1dsp_init_aarch64.c
@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vc1dsp.h"
+
+#include "config.h"
+
+void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                                int h, int x, int y);
+
+av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/videodsp_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/videodsp_init.c
@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/videodsp.h"
+
+void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h);
+
+av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv8(cpu_flags))
+        ctx->prefetch = ff_prefetch_aarch64;
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vorbisdsp_init.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vorbisdsp_init.c
@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang,
+                                     intptr_t blocksize);
+
+av_cold void ff_vorbisdsp_init_aarch64(VorbisDSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
+    }
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp8dsp.h
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp8dsp.h
@ -0,0 +1,75 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_VP8DSP_H
+#define AVCODEC_AARCH64_VP8DSP_H
+
+#include "libavcodec/vp8dsp.h"
+
+#define VP8_LF_Y(hv, inner, opt)                                             \
+    void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst,            \
+                                                    ptrdiff_t stride,        \
+                                                    int flim_E, int flim_I,  \
+                                                    int hev_thresh)
+
+#define VP8_LF_UV(hv, inner, opt)                                            \
+    void ff_vp8_##hv##_loop_filter8uv##inner##_##opt(uint8_t *dstU,          \
+                                                     uint8_t *dstV,          \
+                                                     ptrdiff_t stride,       \
+                                                     int flim_E, int flim_I, \
+                                                     int hev_thresh)
+
+#define VP8_LF_SIMPLE(hv, opt)                                          \
+    void ff_vp8_##hv##_loop_filter16_simple_##opt(uint8_t *dst,         \
+                                                  ptrdiff_t stride,     \
+                                                  int flim)
+
+#define VP8_LF_HV(inner, opt)                   \
+    VP8_LF_Y(h,  inner, opt);                   \
+    VP8_LF_Y(v,  inner, opt);                   \
+    VP8_LF_UV(h, inner, opt);                   \
+    VP8_LF_UV(v, inner, opt)
+
+#define VP8_LF(opt)                             \
+    VP8_LF_HV(,       opt);                     \
+    VP8_LF_HV(_inner, opt);                     \
+    VP8_LF_SIMPLE(h, opt);                      \
+    VP8_LF_SIMPLE(v, opt)
+
+#define VP8_MC(n, opt)                                                  \
+    void ff_put_vp8_##n##_##opt(uint8_t *dst, ptrdiff_t dststride,      \
+                                uint8_t *src, ptrdiff_t srcstride,      \
+                                int h, int x, int y)
+
+#define VP8_EPEL(w, opt)                        \
+    VP8_MC(pixels ## w, opt);                   \
+    VP8_MC(epel ## w ## _h4, opt);              \
+    VP8_MC(epel ## w ## _h6, opt);              \
+    VP8_MC(epel ## w ## _v4, opt);              \
+    VP8_MC(epel ## w ## _h4v4, opt);            \
+    VP8_MC(epel ## w ## _h6v4, opt);            \
+    VP8_MC(epel ## w ## _v6, opt);              \
+    VP8_MC(epel ## w ## _h4v6, opt);            \
+    VP8_MC(epel ## w ## _h6v6, opt)
+
+#define VP8_BILIN(w, opt)                       \
+    VP8_MC(bilin ## w ## _h, opt);              \
+    VP8_MC(bilin ## w ## _v, opt);              \
+    VP8_MC(bilin ## w ## _hv, opt)
+
+#endif /* AVCODEC_AARCH64_VP8DSP_H */
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp8dsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp8dsp_init_aarch64.c
@ -0,0 +1,124 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vp8dsp.h"
+#include "vp8dsp.h"
+
+void ff_vp8_luma_dc_wht_neon(int16_t block[4][4][16], int16_t dc[16]);
+
+void ff_vp8_idct_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4uv_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+
+VP8_LF(neon);
+
+VP8_EPEL(16, neon);
+VP8_EPEL(8,  neon);
+VP8_EPEL(4,  neon);
+
+VP8_BILIN(16, neon);
+VP8_BILIN(8,  neon);
+VP8_BILIN(4,  neon);
+
+av_cold void ff_vp78dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags()))
+        return;
+    dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
+    dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_neon;
+
+    dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon;
+    dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_neon;
+
+    dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_neon;
+    dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilin16_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilin16_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilin16_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilin16_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilin8_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilin8_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilin8_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_neon;
+
+    dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilin4_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilin4_h_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_neon;
+    dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_neon;
+}
+
+av_cold void ff_vp8dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags()))
+        return;
+    dsp->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_neon;
+
+    dsp->vp8_idct_add       = ff_vp8_idct_add_neon;
+    dsp->vp8_idct_dc_add    = ff_vp8_idct_dc_add_neon;
+    dsp->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_neon;
+    dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_neon;
+
+    dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_neon;
+    dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_neon;
+    dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_neon;
+    dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_neon;
+
+    dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_neon;
+    dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_neon;
+    dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_neon;
+    dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_neon;
+
+    dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon;
+    dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon;
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init.h
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init.h
@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_VP9DSP_INIT_H
+#define AVCODEC_AARCH64_VP9DSP_INIT_H
+
+#include "libavcodec/vp9dsp.h"
+
+void ff_vp9dsp_init_10bpp_aarch64(VP9DSPContext *dsp);
+void ff_vp9dsp_init_12bpp_aarch64(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_AARCH64_VP9DSP_INIT_H */
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_10bpp_aarch64.c
@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPP 10
+#define INIT_FUNC ff_vp9dsp_init_10bpp_aarch64
+#include "vp9dsp_init_16bpp_aarch64_template.c"
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_12bpp_aarch64.c
@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPP 12
+#define INIT_FUNC ff_vp9dsp_init_12bpp_aarch64
+#include "vp9dsp_init_16bpp_aarch64_template.c"
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_16bpp_aarch64_template.c
@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2017 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/aarch64/cpu.h"
+#include "vp9dsp_init.h"
+
+#define declare_fpel(type, sz, suffix)                                          \
+void ff_vp9_##type##sz##suffix##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                      const uint8_t *src, ptrdiff_t src_stride, \
+                                      int h, int mx, int my)
+
+#define decl_mc_func(op, filter, dir, sz, bpp)                                                   \
+void ff_vp9_##op##_##filter##sz##_##dir##_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                                       const uint8_t *src, ptrdiff_t src_stride, \
+                                                       int h, int mx, int my)
+
+#define define_8tap_2d_fn(op, filter, sz, bpp)                                      \
+static void op##_##filter##sz##_hv_##bpp##_neon(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                const uint8_t *src,                 \
+                                                ptrdiff_t src_stride,               \
+                                                int h, int mx, int my)              \
+{                                                                                   \
+    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz * 2]);         \
+    /* We only need h + 7 lines, but the horizontal filter assumes an               \
+     * even number of rows, so filter h + 8 lines here. */                          \
+    ff_vp9_put_##filter##sz##_h_##bpp##_neon(temp, 2 * sz,                          \
+                                             src - 3 * src_stride, src_stride,      \
+                                             h + 8, mx, 0);                         \
+    ff_vp9_##op##_##filter##sz##_v_##bpp##_neon(dst, dst_stride,                    \
+                                                temp + 3 * 2 * sz, 2 * sz,          \
+                                                h, 0, my);                          \
+}
+
+#define decl_filter_funcs(op, dir, sz, bpp)  \
+    decl_mc_func(op, regular, dir, sz, bpp); \
+    decl_mc_func(op, sharp,   dir, sz, bpp); \
+    decl_mc_func(op, smooth,  dir, sz, bpp)
+
+#define decl_mc_funcs(sz, bpp)           \
+    decl_filter_funcs(put, h,  sz, bpp); \
+    decl_filter_funcs(avg, h,  sz, bpp); \
+    decl_filter_funcs(put, v,  sz, bpp); \
+    decl_filter_funcs(avg, v,  sz, bpp); \
+    decl_filter_funcs(put, hv, sz, bpp); \
+    decl_filter_funcs(avg, hv, sz, bpp)
+
+#define ff_vp9_copy32_neon  ff_vp9_copy32_aarch64
+#define ff_vp9_copy64_neon  ff_vp9_copy64_aarch64
+#define ff_vp9_copy128_neon ff_vp9_copy128_aarch64
+
+declare_fpel(copy, 128, );
+declare_fpel(copy, 64,  );
+declare_fpel(copy, 32,  );
+declare_fpel(copy, 16,  );
+declare_fpel(copy, 8,   );
+declare_fpel(avg, 64, _16);
+declare_fpel(avg, 32, _16);
+declare_fpel(avg, 16, _16);
+declare_fpel(avg, 8,  _16);
+declare_fpel(avg, 4,  _16);
+
+decl_mc_funcs(64, BPP);
+decl_mc_funcs(32, BPP);
+decl_mc_funcs(16, BPP);
+decl_mc_funcs(8, BPP);
+decl_mc_funcs(4, BPP);
+
+#define define_8tap_2d_funcs(sz, bpp)        \
+    define_8tap_2d_fn(put, regular, sz, bpp) \
+    define_8tap_2d_fn(put, sharp,   sz, bpp) \
+    define_8tap_2d_fn(put, smooth,  sz, bpp) \
+    define_8tap_2d_fn(avg, regular, sz, bpp) \
+    define_8tap_2d_fn(avg, sharp,   sz, bpp) \
+    define_8tap_2d_fn(avg, smooth,  sz, bpp)
+
+define_8tap_2d_funcs(64, BPP)
+define_8tap_2d_funcs(32, BPP)
+define_8tap_2d_funcs(16, BPP)
+define_8tap_2d_funcs(8,  BPP)
+define_8tap_2d_funcs(4,  BPP)
+
+static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_fpel(idx1, idx2, sz, type, suffix)      \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
+
+#define init_copy(idx, sz, suffix) \
+    init_fpel(idx, 0, sz, copy, suffix)
+
+#define init_avg(idx, sz, suffix) \
+    init_fpel(idx, 1, sz, avg,  suffix)
+
+#define init_copy_avg(idx, sz1, sz2) \
+    init_copy(idx, sz2, _neon);      \
+    init_avg (idx, sz1, _16_neon)
+
+    if (have_armv8(cpu_flags)) {
+        init_copy(0, 128, _aarch64);
+        init_copy(1, 64,  _aarch64);
+        init_copy(2, 32,  _aarch64);
+    }
+
+    if (have_neon(cpu_flags)) {
+#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx, bpp) \
+    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_##bpp##_neon
+
+#define init_mc_funcs(idx, dir, mx, my, sz, pfx, bpp)                                   \
+    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx, bpp); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx, bpp)
+
+#define init_mc_funcs_dirs(idx, sz, bpp)            \
+    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_, bpp); \
+    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_, bpp); \
+    init_mc_funcs(idx, hv, 1, 1, sz,        , bpp)
+
+
+        init_avg(0, 64, _16_neon);
+        init_avg(1, 32, _16_neon);
+        init_avg(2, 16, _16_neon);
+        init_copy_avg(3, 8, 16);
+        init_copy_avg(4, 4, 8);
+
+        init_mc_funcs_dirs(0, 64, BPP);
+        init_mc_funcs_dirs(1, 32, BPP);
+        init_mc_funcs_dirs(2, 16, BPP);
+        init_mc_funcs_dirs(3, 8,  BPP);
+        init_mc_funcs_dirs(4, 4,  BPP);
+    }
+}
+
+#define define_itxfm2(type_a, type_b, sz, bpp)                                     \
+void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_##bpp##_neon(uint8_t *_dst,    \
+                                                                 ptrdiff_t stride, \
+                                                                 int16_t *_block, int eob)
+#define define_itxfm(type_a, type_b, sz, bpp) define_itxfm2(type_a, type_b, sz, bpp)
+
+#define define_itxfm_funcs(sz, bpp)      \
+    define_itxfm(idct,  idct,  sz, bpp); \
+    define_itxfm(iadst, idct,  sz, bpp); \
+    define_itxfm(idct,  iadst, sz, bpp); \
+    define_itxfm(iadst, iadst, sz, bpp)
+
+define_itxfm_funcs(4,  BPP);
+define_itxfm_funcs(8,  BPP);
+define_itxfm_funcs(16, BPP);
+define_itxfm(idct, idct, 32, BPP);
+define_itxfm(iwht, iwht, 4,  BPP);
+
+
+static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_itxfm2(tx, sz, bpp)                                               \
+    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_##bpp##_neon;  \
+    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_##bpp##_neon; \
+    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_##bpp##_neon; \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_##bpp##_neon
+#define init_itxfm(tx, sz, bpp) init_itxfm2(tx, sz, bpp)
+
+#define init_idct2(tx, nm, bpp)     \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_##bpp##_neon
+#define init_idct(tx, nm, bpp) init_idct2(tx, nm, bpp)
+
+        init_itxfm(TX_4X4,   4x4,   BPP);
+        init_itxfm(TX_8X8,   8x8,   BPP);
+        init_itxfm(TX_16X16, 16x16, BPP);
+        init_idct(TX_32X32, idct_idct_32x32, BPP);
+        init_idct(4,        iwht_iwht_4x4,   BPP);
+    }
+}
+
+#define define_loop_filter(dir, wd, size, bpp) \
+void ff_vp9_loop_filter_##dir##_##wd##_##size##_##bpp##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
+
+#define define_loop_filters(wd, size, bpp) \
+    define_loop_filter(h, wd, size, bpp);  \
+    define_loop_filter(v, wd, size, bpp)
+
+define_loop_filters(4,  8,  BPP);
+define_loop_filters(8,  8,  BPP);
+define_loop_filters(16, 8,  BPP);
+
+define_loop_filters(16, 16, BPP);
+
+define_loop_filters(44, 16, BPP);
+define_loop_filters(48, 16, BPP);
+define_loop_filters(84, 16, BPP);
+define_loop_filters(88, 16, BPP);
+
+static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_lpf_func_8(idx1, idx2, dir, wd, bpp) \
+    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_8_##bpp##_neon
+
+#define init_lpf_func_16(idx, dir, bpp) \
+    dsp->loop_filter_16[idx] = ff_vp9_loop_filter_##dir##_16_16_##bpp##_neon
+
+#define init_lpf_func_mix2(idx1, idx2, idx3, dir, wd, bpp) \
+    dsp->loop_filter_mix2[idx1][idx2][idx3] = ff_vp9_loop_filter_##dir##_##wd##_16_##bpp##_neon
+
+#define init_lpf_funcs_8_wd(idx, wd, bpp) \
+    init_lpf_func_8(idx, 0, h, wd, bpp);  \
+    init_lpf_func_8(idx, 1, v, wd, bpp)
+
+#define init_lpf_funcs_16(bpp)   \
+    init_lpf_func_16(0, h, bpp); \
+    init_lpf_func_16(1, v, bpp)
+
+#define init_lpf_funcs_mix2_wd(idx1, idx2, wd, bpp) \
+    init_lpf_func_mix2(idx1, idx2, 0, h, wd, bpp);  \
+    init_lpf_func_mix2(idx1, idx2, 1, v, wd, bpp)
+
+#define init_lpf_funcs_8(bpp)        \
+    init_lpf_funcs_8_wd(0, 4,  bpp); \
+    init_lpf_funcs_8_wd(1, 8,  bpp); \
+    init_lpf_funcs_8_wd(2, 16, bpp)
+
+#define init_lpf_funcs_mix2(bpp)           \
+    init_lpf_funcs_mix2_wd(0, 0, 44, bpp); \
+    init_lpf_funcs_mix2_wd(0, 1, 48, bpp); \
+    init_lpf_funcs_mix2_wd(1, 0, 84, bpp); \
+    init_lpf_funcs_mix2_wd(1, 1, 88, bpp)
+
+        init_lpf_funcs_8(BPP);
+        init_lpf_funcs_16(BPP);
+        init_lpf_funcs_mix2(BPP);
+    }
+}
+
+av_cold void INIT_FUNC(VP9DSPContext *dsp)
+{
+    vp9dsp_mc_init_aarch64(dsp);
+    vp9dsp_loopfilter_init_aarch64(dsp);
+    vp9dsp_itxfm_init_aarch64(dsp);
+}
--- a/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_aarch64.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavcodec/aarch64/vp9dsp_init_aarch64.c
@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp_init.h"
+
+#define declare_fpel(type, sz)                                          \
+void ff_vp9_##type##sz##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                              const uint8_t *src, ptrdiff_t src_stride, \
+                              int h, int mx, int my)
+
+#define declare_copy_avg(sz) \
+    declare_fpel(copy, sz);  \
+    declare_fpel(avg , sz)
+
+#define decl_mc_func(op, filter, dir, sz)                                                \
+void ff_vp9_##op##_##filter##sz##_##dir##_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                               const uint8_t *src, ptrdiff_t src_stride, \
+                                               int h, int mx, int my)
+
+#define define_8tap_2d_fn(op, filter, sz)                                         \
+static void op##_##filter##sz##_hv_neon(uint8_t *dst, ptrdiff_t dst_stride,       \
+                                        const uint8_t *src, ptrdiff_t src_stride, \
+                                        int h, int mx, int my)                    \
+{                                                                                 \
+    LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz]);           \
+    /* We only need h + 7 lines, but the horizontal filter assumes an             \
+     * even number of rows, so filter h + 8 lines here. */                        \
+    ff_vp9_put_##filter##sz##_h_neon(temp, sz,                                    \
+                                     src - 3 * src_stride, src_stride,            \
+                                     h + 8, mx, 0);                               \
+    ff_vp9_##op##_##filter##sz##_v_neon(dst, dst_stride,                          \
+                                        temp + 3 * sz, sz,                        \
+                                        h, 0, my);                                \
+}
+
+#define decl_filter_funcs(op, dir, sz)  \
+    decl_mc_func(op, regular, dir, sz); \
+    decl_mc_func(op, sharp,   dir, sz); \
+    decl_mc_func(op, smooth,  dir, sz)
+
+#define decl_mc_funcs(sz)           \
+    decl_filter_funcs(put, h,  sz); \
+    decl_filter_funcs(avg, h,  sz); \
+    decl_filter_funcs(put, v,  sz); \
+    decl_filter_funcs(avg, v,  sz); \
+    decl_filter_funcs(put, hv, sz); \
+    decl_filter_funcs(avg, hv, sz)
+
+#define ff_vp9_copy32_neon ff_vp9_copy32_aarch64
+#define ff_vp9_copy64_neon ff_vp9_copy64_aarch64
+
+declare_copy_avg(64);
+declare_copy_avg(32);
+declare_copy_avg(16);
+declare_copy_avg(8);
+declare_copy_avg(4);
+
+decl_mc_funcs(64);
+decl_mc_funcs(32);
+decl_mc_funcs(16);
+decl_mc_funcs(8);
+decl_mc_funcs(4);
+
+#define define_8tap_2d_funcs(sz)        \
+    define_8tap_2d_fn(put, regular, sz) \
+    define_8tap_2d_fn(put, sharp,   sz) \
+    define_8tap_2d_fn(put, smooth,  sz) \
+    define_8tap_2d_fn(avg, regular, sz) \
+    define_8tap_2d_fn(avg, sharp,   sz) \
+    define_8tap_2d_fn(avg, smooth,  sz)
+
+define_8tap_2d_funcs(64)
+define_8tap_2d_funcs(32)
+define_8tap_2d_funcs(16)
+define_8tap_2d_funcs(8)
+define_8tap_2d_funcs(4)
+
+static av_cold void vp9dsp_mc_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_fpel(idx1, idx2, sz, type, suffix)      \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##suffix
+
+#define init_copy(idx, sz, suffix) \
+    init_fpel(idx, 0, sz, copy, suffix)
+
+#define init_avg(idx, sz, suffix) \
+    init_fpel(idx, 1, sz, avg,  suffix)
+
+#define init_copy_avg(idx, sz) \
+    init_copy(idx, sz, _neon); \
+    init_avg (idx, sz, _neon)
+
+    if (have_armv8(cpu_flags)) {
+        init_copy(0, 64, _aarch64);
+        init_copy(1, 32, _aarch64);
+    }
+
+    if (have_neon(cpu_flags)) {
+#define init_mc_func(idx1, idx2, op, filter, fname, dir, mx, my, sz, pfx) \
+    dsp->mc[idx1][filter][idx2][mx][my] = pfx##op##_##fname##sz##_##dir##_neon
+
+#define init_mc_funcs(idx, dir, mx, my, sz, pfx)                                   \
+    init_mc_func(idx, 0, put, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 0, put, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_REGULAR, regular, dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SHARP,   sharp,   dir, mx, my, sz, pfx); \
+    init_mc_func(idx, 1, avg, FILTER_8TAP_SMOOTH,  smooth,  dir, mx, my, sz, pfx)
+
+#define init_mc_funcs_dirs(idx, sz)            \
+    init_mc_funcs(idx, h,  1, 0, sz, ff_vp9_); \
+    init_mc_funcs(idx, v,  0, 1, sz, ff_vp9_); \
+    init_mc_funcs(idx, hv, 1, 1, sz,)
+
+        init_avg(0, 64, _neon);
+        init_avg(1, 32, _neon);
+        init_copy_avg(2, 16);
+        init_copy_avg(3, 8);
+        init_copy_avg(4, 4);
+
+        init_mc_funcs_dirs(0, 64);
+        init_mc_funcs_dirs(1, 32);
+        init_mc_funcs_dirs(2, 16);
+        init_mc_funcs_dirs(3, 8);
+        init_mc_funcs_dirs(4, 4);
+    }
+}
+
+#define define_itxfm(type_a, type_b, sz)                                   \
+void ff_vp9_##type_a##_##type_b##_##sz##x##sz##_add_neon(uint8_t *_dst,    \
+                                                         ptrdiff_t stride, \
+                                                         int16_t *_block, int eob)
+
+#define define_itxfm_funcs(sz)      \
+    define_itxfm(idct,  idct,  sz); \
+    define_itxfm(iadst, idct,  sz); \
+    define_itxfm(idct,  iadst, sz); \
+    define_itxfm(iadst, iadst, sz)
+
+define_itxfm_funcs(4);
+define_itxfm_funcs(8);
+define_itxfm_funcs(16);
+define_itxfm(idct, idct, 32);
+define_itxfm(iwht, iwht, 4);
+
+
+static av_cold void vp9dsp_itxfm_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+#define init_itxfm(tx, sz)                                             \
+    dsp->itxfm_add[tx][DCT_DCT]   = ff_vp9_idct_idct_##sz##_add_neon;  \
+    dsp->itxfm_add[tx][DCT_ADST]  = ff_vp9_iadst_idct_##sz##_add_neon; \
+    dsp->itxfm_add[tx][ADST_DCT]  = ff_vp9_idct_iadst_##sz##_add_neon; \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_iadst_iadst_##sz##_add_neon
+
+#define init_idct(tx, nm)           \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_vp9_##nm##_add_neon
+
+        init_itxfm(TX_4X4, 4x4);
+        init_itxfm(TX_8X8, 8x8);
+        init_itxfm(TX_16X16, 16x16);
+        init_idct(TX_32X32, idct_idct_32x32);
+        init_idct(4, iwht_iwht_4x4);
+    }
+}
+
+#define define_loop_filter(dir, wd, len) \
+void ff_vp9_loop_filter_##dir##_##wd##_##len##_neon(uint8_t *dst, ptrdiff_t stride, int E, int I, int H)
+
+#define define_loop_filters(wd, len) \
+    define_loop_filter(h, wd, len);  \
+    define_loop_filter(v, wd, len)
+
+define_loop_filters(4, 8);
+define_loop_filters(8, 8);
+define_loop_filters(16, 8);
+
+define_loop_filters(16, 16);
+
+define_loop_filters(44, 16);
+define_loop_filters(48, 16);
+define_loop_filters(84, 16);
+define_loop_filters(88, 16);
+
+static av_cold void vp9dsp_loopfilter_init_aarch64(VP9DSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_neon;
+        dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_neon;
+        dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_neon;
+        dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_neon;
+        dsp->loop_filter_8[2][1] = ff_vp9_loop_filter_v_16_8_neon;
+        dsp->loop_filter_8[2][0] = ff_vp9_loop_filter_h_16_8_neon;
+
+        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_neon;
+        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_neon;
+
+        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_neon;
+        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_neon;
+        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_neon;
+        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_neon;
+        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_neon;
+        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_neon;
+        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_neon;
+        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_neon;
+    }
+}
+
+av_cold void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 10) {
+        ff_vp9dsp_init_10bpp_aarch64(dsp);
+        return;
+    } else if (bpp == 12) {
+        ff_vp9dsp_init_12bpp_aarch64(dsp);
+        return;
+    } else if (bpp != 8)
+        return;
+
+    vp9dsp_mc_init_aarch64(dsp);
+    vp9dsp_loopfilter_init_aarch64(dsp);
+    vp9dsp_itxfm_init_aarch64(dsp);
+}