mirror of
https://github.com/ossrs/srs.git
synced 2025-03-09 15:49:59 +00:00
parent
634a14bfa6
commit
4308f238c0
690 changed files with 182077 additions and 1 deletions
154
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/asm.h
vendored
Normal file
154
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/asm.h
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_ASM_H
|
||||
#define AVUTIL_X86_ASM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
|
||||
typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
|
||||
|
||||
#if ARCH_X86_64
|
||||
# define FF_OPSIZE "q"
|
||||
# define FF_REG_a "rax"
|
||||
# define FF_REG_b "rbx"
|
||||
# define FF_REG_c "rcx"
|
||||
# define FF_REG_d "rdx"
|
||||
# define FF_REG_D "rdi"
|
||||
# define FF_REG_S "rsi"
|
||||
# define FF_PTR_SIZE "8"
|
||||
typedef int64_t x86_reg;
|
||||
|
||||
/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
|
||||
# define FF_REG_sp "rsp"
|
||||
# define FF_REG_BP "rbp"
|
||||
# define FF_REGBP rbp
|
||||
# define FF_REGa rax
|
||||
# define FF_REGb rbx
|
||||
# define FF_REGc rcx
|
||||
# define FF_REGd rdx
|
||||
# define FF_REGSP rsp
|
||||
|
||||
#elif ARCH_X86_32
|
||||
|
||||
# define FF_OPSIZE "l"
|
||||
# define FF_REG_a "eax"
|
||||
# define FF_REG_b "ebx"
|
||||
# define FF_REG_c "ecx"
|
||||
# define FF_REG_d "edx"
|
||||
# define FF_REG_D "edi"
|
||||
# define FF_REG_S "esi"
|
||||
# define FF_PTR_SIZE "4"
|
||||
typedef int32_t x86_reg;
|
||||
|
||||
# define FF_REG_sp "esp"
|
||||
# define FF_REG_BP "ebp"
|
||||
# define FF_REGBP ebp
|
||||
# define FF_REGa eax
|
||||
# define FF_REGb ebx
|
||||
# define FF_REGc ecx
|
||||
# define FF_REGd edx
|
||||
# define FF_REGSP esp
|
||||
#else
|
||||
typedef int x86_reg;
|
||||
#endif
|
||||
|
||||
#define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE))
|
||||
#define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE))
|
||||
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define BROKEN_RELOCATIONS 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If gcc is not set to support sse (-msse) it will not accept xmm registers
|
||||
* in the clobber list for inline asm. XMM_CLOBBERS takes a list of xmm
|
||||
* registers to be marked as clobbered and evaluates to nothing if they are
|
||||
* not supported, or to the list itself if they are supported. Since a clobber
|
||||
* list may not be empty, XMM_CLOBBERS_ONLY should be used if the xmm
|
||||
* registers are the only in the clobber list.
|
||||
* For example a list with "eax" and "xmm0" as clobbers should become:
|
||||
* : XMM_CLOBBERS("xmm0",) "eax"
|
||||
* and a list with only "xmm0" should become:
|
||||
* XMM_CLOBBERS_ONLY("xmm0")
|
||||
*/
|
||||
#if HAVE_XMM_CLOBBERS
|
||||
# define XMM_CLOBBERS(...) __VA_ARGS__
|
||||
# define XMM_CLOBBERS_ONLY(...) : __VA_ARGS__
|
||||
#else
|
||||
# define XMM_CLOBBERS(...)
|
||||
# define XMM_CLOBBERS_ONLY(...)
|
||||
#endif
|
||||
|
||||
/* Use to export labels from asm. */
|
||||
#define LABEL_MANGLE(a) EXTERN_PREFIX #a
|
||||
|
||||
// Use rip-relative addressing if compiling PIC code on x86-64.
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define LOCAL_MANGLE(a) #a "(%%rip)"
|
||||
#else
|
||||
# define LOCAL_MANGLE(a) #a
|
||||
#endif
|
||||
|
||||
#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
|
||||
# define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
|
||||
# define NAMED_CONSTRAINTS_ADD(...)
|
||||
# define NAMED_CONSTRAINTS(...)
|
||||
# define NAMED_CONSTRAINTS_ARRAY_ADD(...)
|
||||
# define NAMED_CONSTRAINTS_ARRAY(...)
|
||||
#else
|
||||
/* When direct symbol references are used in code passed to a compiler that does not support them
|
||||
* then these references need to be converted to named asm constraints instead.
|
||||
* Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol.
|
||||
* In order for this to work there must also be a corresponding entry in the asm-interface. To add this
|
||||
* entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the
|
||||
* corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ).
|
||||
* If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list.
|
||||
*/
|
||||
# define MANGLE(a) "%["#a"]"
|
||||
// Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work
|
||||
# define FE_0(P,X) P(X)
|
||||
# define FE_1(P,X,X1) P(X), FE_0(P,X1)
|
||||
# define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2)
|
||||
# define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3)
|
||||
# define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4)
|
||||
# define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5)
|
||||
# define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6)
|
||||
# define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7)
|
||||
# define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8)
|
||||
# define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9)
|
||||
# define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
|
||||
# define GET_FE(A) GET_FE_IMPL A
|
||||
# define GET_FE_GLUE(x, y) x y
|
||||
# define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__))
|
||||
# define NAME_CONSTRAINT(x) [x] "m"(x)
|
||||
// Parameters are a list of each symbol reference required
|
||||
# define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
|
||||
// Same but without comma for when there are no previously defined constraints
|
||||
# define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
|
||||
// Same as above NAMED_CONSTRAINTS except used for passing arrays/pointers instead of normal variables
|
||||
# define NAME_CONSTRAINT_ARRAY(x) [x] "m"(*x)
|
||||
# define NAMED_CONSTRAINTS_ARRAY_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT_ARRAY,__VA_ARGS__)
|
||||
# define NAMED_CONSTRAINTS_ARRAY(...) FOR_EACH_VA(NAME_CONSTRAINT_ARRAY,__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#endif /* AVUTIL_X86_ASM_H */
|
87
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/bswap.h
vendored
Normal file
87
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/bswap.h
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* byte swapping routines
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_BSWAP_H
|
||||
#define AVUTIL_X86_BSWAP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
|
||||
{
|
||||
return _rotr16(x, 8);
|
||||
}
|
||||
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
return _byteswap_ulong(x);
|
||||
}
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define av_bswap64 av_bswap64
|
||||
static inline uint64_t av_const av_bswap64(uint64_t x)
|
||||
{
|
||||
return _byteswap_uint64(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
#if AV_GCC_VERSION_AT_MOST(4,0)
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const unsigned av_bswap16(unsigned x)
|
||||
{
|
||||
__asm__("rorw $8, %w0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
#endif /* AV_GCC_VERSION_AT_MOST(4,0) */
|
||||
|
||||
#if AV_GCC_VERSION_AT_MOST(4,4) || defined(__INTEL_COMPILER)
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
__asm__("bswap %0" : "+r" (x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define av_bswap64 av_bswap64
|
||||
static inline uint64_t av_const av_bswap64(uint64_t x)
|
||||
{
|
||||
__asm__("bswap %0": "=r" (x) : "0" (x));
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
#endif /* AVUTIL_X86_BSWAP_H */
|
272
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/cpu.c
vendored
Normal file
272
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/cpu.c
vendored
Normal file
|
@ -0,0 +1,272 @@
|
|||
/*
|
||||
* CPU detection code, extracted from mmx.h
|
||||
* (c)1997-99 by H. Dietz and R. Fisher
|
||||
* Converted to C and improved by Fabrice Bellard.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#if HAVE_X86ASM
|
||||
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
ff_cpu_xgetbv(index, &eax, &edx)
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
__asm__ volatile ( \
|
||||
"mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%"FF_REG_b", %%"FF_REG_S \
|
||||
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
|
||||
: "0" (index), "2"(0))
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
|
||||
|
||||
#define get_eflags(x) \
|
||||
__asm__ volatile ("pushfl \n" \
|
||||
"pop %0 \n" \
|
||||
: "=r"(x))
|
||||
|
||||
#define set_eflags(x) \
|
||||
__asm__ volatile ("push %0 \n" \
|
||||
"popfl \n" \
|
||||
:: "r"(x))
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#if ARCH_X86_64
|
||||
|
||||
#define cpuid_test() 1
|
||||
|
||||
#elif HAVE_X86ASM
|
||||
|
||||
#define cpuid_test ff_cpu_cpuid_test
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
static int cpuid_test(void)
|
||||
{
|
||||
x86_reg a, c;
|
||||
|
||||
/* Check if CPUID is supported by attempting to toggle the ID bit in
|
||||
* the EFLAGS register. */
|
||||
get_eflags(a);
|
||||
set_eflags(a ^ 0x200000);
|
||||
get_eflags(c);
|
||||
|
||||
return a != c;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function to test if multimedia instructions are supported... */
|
||||
int ff_get_cpu_flags_x86(void)
|
||||
{
|
||||
int rval = 0;
|
||||
|
||||
#ifdef cpuid
|
||||
|
||||
int eax, ebx, ecx, edx;
|
||||
int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
|
||||
int family = 0, model = 0;
|
||||
union { int i[3]; char c[12]; } vendor;
|
||||
int xcr0_lo = 0, xcr0_hi = 0;
|
||||
|
||||
if (!cpuid_test())
|
||||
return 0; /* CPUID not supported */
|
||||
|
||||
cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
|
||||
|
||||
if (max_std_level >= 1) {
|
||||
cpuid(1, eax, ebx, ecx, std_caps);
|
||||
family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
|
||||
model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
|
||||
if (std_caps & (1 << 15))
|
||||
rval |= AV_CPU_FLAG_CMOV;
|
||||
if (std_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
#if HAVE_SSE
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_SSE;
|
||||
if (std_caps & (1 << 26))
|
||||
rval |= AV_CPU_FLAG_SSE2;
|
||||
if (ecx & 1)
|
||||
rval |= AV_CPU_FLAG_SSE3;
|
||||
if (ecx & 0x00000200 )
|
||||
rval |= AV_CPU_FLAG_SSSE3;
|
||||
if (ecx & 0x00080000 )
|
||||
rval |= AV_CPU_FLAG_SSE4;
|
||||
if (ecx & 0x00100000 )
|
||||
rval |= AV_CPU_FLAG_SSE42;
|
||||
if (ecx & 0x02000000 )
|
||||
rval |= AV_CPU_FLAG_AESNI;
|
||||
#if HAVE_AVX
|
||||
/* Check OXSAVE and AVX bits */
|
||||
if ((ecx & 0x18000000) == 0x18000000) {
|
||||
/* Check for OS support */
|
||||
xgetbv(0, xcr0_lo, xcr0_hi);
|
||||
if ((xcr0_lo & 0x6) == 0x6) {
|
||||
rval |= AV_CPU_FLAG_AVX;
|
||||
if (ecx & 0x00001000)
|
||||
rval |= AV_CPU_FLAG_FMA3;
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_AVX */
|
||||
#endif /* HAVE_SSE */
|
||||
}
|
||||
if (max_std_level >= 7) {
|
||||
cpuid(7, eax, ebx, ecx, edx);
|
||||
#if HAVE_AVX2
|
||||
if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
|
||||
rval |= AV_CPU_FLAG_AVX2;
|
||||
#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
|
||||
if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
|
||||
if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
|
||||
rval |= AV_CPU_FLAG_AVX512;
|
||||
|
||||
}
|
||||
#endif /* HAVE_AVX512 */
|
||||
#endif /* HAVE_AVX2 */
|
||||
/* BMI1/2 don't need OS support */
|
||||
if (ebx & 0x00000008) {
|
||||
rval |= AV_CPU_FLAG_BMI1;
|
||||
if (ebx & 0x00000100)
|
||||
rval |= AV_CPU_FLAG_BMI2;
|
||||
}
|
||||
}
|
||||
|
||||
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
|
||||
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
cpuid(0x80000001, eax, ebx, ecx, ext_caps);
|
||||
if (ext_caps & (1U << 31))
|
||||
rval |= AV_CPU_FLAG_3DNOW;
|
||||
if (ext_caps & (1 << 30))
|
||||
rval |= AV_CPU_FLAG_3DNOWEXT;
|
||||
if (ext_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (ext_caps & (1 << 22))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
|
||||
if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
|
||||
/* Allow for selectively disabling SSE2 functions on AMD processors
|
||||
with SSE2 support but not SSE4a. This includes Athlon64, some
|
||||
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
|
||||
than SSE2 often enough to utilize this special-case flag.
|
||||
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
|
||||
so that SSE2 is used unless explicitly disabled by checking
|
||||
AV_CPU_FLAG_SSE2SLOW. */
|
||||
if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
|
||||
rval |= AV_CPU_FLAG_SSE2SLOW;
|
||||
|
||||
/* Similar to the above but for AVX functions on AMD processors.
|
||||
This is necessary only for functions using YMM registers on Bulldozer
|
||||
and Jaguar based CPUs as they lack 256-bit execution units. SSE/AVX
|
||||
functions using XMM registers are always faster on them.
|
||||
AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
|
||||
used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. */
|
||||
if ((family == 0x15 || family == 0x16) && (rval & AV_CPU_FLAG_AVX))
|
||||
rval |= AV_CPU_FLAG_AVXSLOW;
|
||||
}
|
||||
|
||||
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
|
||||
* used unless the OS has AVX support. */
|
||||
if (rval & AV_CPU_FLAG_AVX) {
|
||||
if (ecx & 0x00000800)
|
||||
rval |= AV_CPU_FLAG_XOP;
|
||||
if (ecx & 0x00010000)
|
||||
rval |= AV_CPU_FLAG_FMA4;
|
||||
}
|
||||
}
|
||||
|
||||
if (!strncmp(vendor.c, "GenuineIntel", 12)) {
|
||||
if (family == 6 && (model == 9 || model == 13 || model == 14)) {
|
||||
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
|
||||
* 6/14 (core1 "yonah") theoretically support sse2, but it's
|
||||
* usually slower than mmx, so let's just pretend they don't.
|
||||
* AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
|
||||
* enabled so that SSE2 is not used unless explicitly enabled
|
||||
* by checking AV_CPU_FLAG_SSE2SLOW. The same situation
|
||||
* applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
|
||||
if (rval & AV_CPU_FLAG_SSE2)
|
||||
rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
|
||||
if (rval & AV_CPU_FLAG_SSE3)
|
||||
rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
|
||||
}
|
||||
/* The Atom processor has SSSE3 support, which is useful in many cases,
|
||||
* but sometimes the SSSE3 version is slower than the SSE2 equivalent
|
||||
* on the Atom, but is generally faster on other processors supporting
|
||||
* SSSE3. This flag allows for selectively disabling certain SSSE3
|
||||
* functions on the Atom. */
|
||||
if (family == 6 && model == 28)
|
||||
rval |= AV_CPU_FLAG_ATOM;
|
||||
|
||||
/* Conroe has a slow shuffle unit. Check the model number to ensure not
|
||||
* to include crippled low-end Penryns and Nehalems that lack SSE4. */
|
||||
if ((rval & AV_CPU_FLAG_SSSE3) && !(rval & AV_CPU_FLAG_SSE4) &&
|
||||
family == 6 && model < 23)
|
||||
rval |= AV_CPU_FLAG_SSSE3SLOW;
|
||||
}
|
||||
|
||||
#endif /* cpuid */
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
size_t ff_get_cpu_max_align_x86(void)
|
||||
{
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_AVX512)
|
||||
return 64;
|
||||
if (flags & (AV_CPU_FLAG_AVX2 |
|
||||
AV_CPU_FLAG_AVX |
|
||||
AV_CPU_FLAG_XOP |
|
||||
AV_CPU_FLAG_FMA4 |
|
||||
AV_CPU_FLAG_FMA3 |
|
||||
AV_CPU_FLAG_AVXSLOW))
|
||||
return 32;
|
||||
if (flags & (AV_CPU_FLAG_AESNI |
|
||||
AV_CPU_FLAG_SSE42 |
|
||||
AV_CPU_FLAG_SSE4 |
|
||||
AV_CPU_FLAG_SSSE3 |
|
||||
AV_CPU_FLAG_SSE3 |
|
||||
AV_CPU_FLAG_SSE2 |
|
||||
AV_CPU_FLAG_SSE |
|
||||
AV_CPU_FLAG_ATOM |
|
||||
AV_CPU_FLAG_SSSE3SLOW |
|
||||
AV_CPU_FLAG_SSE3SLOW |
|
||||
AV_CPU_FLAG_SSE2SLOW))
|
||||
return 16;
|
||||
|
||||
return 8;
|
||||
}
|
113
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/cpu.h
vendored
Normal file
113
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/cpu.h
vendored
Normal file
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_CPU_H
|
||||
#define AVUTIL_X86_CPU_H
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#define AV_CPU_FLAG_AMD3DNOW AV_CPU_FLAG_3DNOW
|
||||
#define AV_CPU_FLAG_AMD3DNOWEXT AV_CPU_FLAG_3DNOWEXT
|
||||
|
||||
#define X86_AMD3DNOW(flags) CPUEXT(flags, AMD3DNOW)
|
||||
#define X86_AMD3DNOWEXT(flags) CPUEXT(flags, AMD3DNOWEXT)
|
||||
#define X86_MMX(flags) CPUEXT(flags, MMX)
|
||||
#define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT)
|
||||
#define X86_SSE(flags) CPUEXT(flags, SSE)
|
||||
#define X86_SSE2(flags) CPUEXT(flags, SSE2)
|
||||
#define X86_SSE2_FAST(flags) CPUEXT_FAST(flags, SSE2)
|
||||
#define X86_SSE2_SLOW(flags) CPUEXT_SLOW(flags, SSE2)
|
||||
#define X86_SSE3(flags) CPUEXT(flags, SSE3)
|
||||
#define X86_SSE3_FAST(flags) CPUEXT_FAST(flags, SSE3)
|
||||
#define X86_SSE3_SLOW(flags) CPUEXT_SLOW(flags, SSE3)
|
||||
#define X86_SSSE3(flags) CPUEXT(flags, SSSE3)
|
||||
#define X86_SSSE3_FAST(flags) CPUEXT_FAST(flags, SSSE3)
|
||||
#define X86_SSSE3_SLOW(flags) CPUEXT_SLOW(flags, SSSE3)
|
||||
#define X86_SSE4(flags) CPUEXT(flags, SSE4)
|
||||
#define X86_SSE42(flags) CPUEXT(flags, SSE42)
|
||||
#define X86_AVX(flags) CPUEXT(flags, AVX)
|
||||
#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX)
|
||||
#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX)
|
||||
#define X86_XOP(flags) CPUEXT(flags, XOP)
|
||||
#define X86_FMA3(flags) CPUEXT(flags, FMA3)
|
||||
#define X86_FMA4(flags) CPUEXT(flags, FMA4)
|
||||
#define X86_AVX2(flags) CPUEXT(flags, AVX2)
|
||||
#define X86_AESNI(flags) CPUEXT(flags, AESNI)
|
||||
#define X86_AVX512(flags) CPUEXT(flags, AVX512)
|
||||
|
||||
#define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
|
||||
#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
|
||||
#define EXTERNAL_MMX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMX)
|
||||
#define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT)
|
||||
#define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE)
|
||||
#define EXTERNAL_SSE2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSE4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4)
|
||||
#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
|
||||
#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
|
||||
#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
|
||||
#define EXTERNAL_FMA3_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, FMA3, AVX)
|
||||
#define EXTERNAL_FMA3_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, FMA3, AVX)
|
||||
#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
|
||||
#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
|
||||
#define EXTERNAL_AVX2_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX)
|
||||
#define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
|
||||
#define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
|
||||
#define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
|
||||
|
||||
#define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
|
||||
#define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
|
||||
#define INLINE_MMX(flags) CPUEXT_SUFFIX(flags, _INLINE, MMX)
|
||||
#define INLINE_MMXEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, MMXEXT)
|
||||
#define INLINE_SSE(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE)
|
||||
#define INLINE_SSE2(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSE4(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE4)
|
||||
#define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
|
||||
#define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX)
|
||||
#define INLINE_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, AVX)
|
||||
#define INLINE_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, AVX)
|
||||
#define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP)
|
||||
#define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3)
|
||||
#define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4)
|
||||
#define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2)
|
||||
#define INLINE_AESNI(flags) CPUEXT_SUFFIX(flags, _INLINE, AESNI)
|
||||
|
||||
void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void ff_cpu_xgetbv(int op, int *eax, int *edx);
|
||||
int ff_cpu_cpuid_test(void);
|
||||
|
||||
#endif /* AVUTIL_X86_CPU_H */
|
55
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/emms.h
vendored
Normal file
55
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/emms.h
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_EMMS_H
|
||||
#define AVUTIL_X86_EMMS_H
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
|
||||
void avpriv_emms_asm(void);
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
# define emms_c emms_c
|
||||
/**
|
||||
* Empty mmx state.
|
||||
* this must be called between any dsp function and float/double code.
|
||||
* for example sin(); dsp->idct_put(); emms_c(); cos()
|
||||
* Note, *alloc() and *free() also use float code in some libc implementations
|
||||
* thus this also applies to them or any function using them.
|
||||
*/
|
||||
static av_always_inline void emms_c(void)
|
||||
{
|
||||
/* Some inlined functions may also use mmx instructions regardless of
|
||||
* runtime cpuflags. With that in mind, we unconditionally empty the
|
||||
* mmx state if the target cpu chosen at configure time supports it.
|
||||
*/
|
||||
#if !defined(__MMX__)
|
||||
if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)
|
||||
#endif
|
||||
__asm__ volatile ("emms" ::: "memory");
|
||||
}
|
||||
#elif HAVE_MMX && HAVE_MM_EMPTY
|
||||
# include <mmintrin.h>
|
||||
# define emms_c _mm_empty
|
||||
#elif HAVE_MMX_EXTERNAL
|
||||
# define emms_c avpriv_emms_asm
|
||||
#endif /* HAVE_MMX_INLINE */
|
||||
|
||||
#endif /* AVUTIL_X86_EMMS_H */
|
35
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/fixed_dsp_init.c
vendored
Normal file
35
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/fixed_dsp_init.c
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/fixed_dsp.h"
|
||||
#include "cpu.h"
|
||||
|
||||
void ff_butterflies_fixed_sse2(int *src0, int *src1, int len);
|
||||
|
||||
av_cold void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
fdsp->butterflies_fixed = ff_butterflies_fixed_sse2;
|
||||
}
|
||||
}
|
121
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/float_dsp_init.c
vendored
Normal file
121
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/float_dsp_init.c
vendored
Normal file
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
#include "asm.h"
|
||||
|
||||
void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1,
|
||||
int len);
|
||||
void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmac_scalar_sse2(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
void ff_vector_dmac_scalar_avx(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
void ff_vector_dmac_scalar_fma3(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
void ff_vector_dmul_scalar_avx(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
|
||||
void ff_vector_fmul_window_3dnowext(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
void ff_vector_fmul_window_sse(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
|
||||
void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
|
||||
void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||
|
||||
void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len);
|
||||
|
||||
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_3dnowext;
|
||||
}
|
||||
if (EXTERNAL_SSE(cpu_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_sse;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
||||
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||
fdsp->butterflies_float = ff_butterflies_float_sse;
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
fdsp->vector_dmul = ff_vector_dmul_sse2;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||
fdsp->vector_dmul = ff_vector_dmul_avx;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_fma3;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3;
|
||||
}
|
||||
}
|
49
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/imgutils_init.c
vendored
Normal file
49
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/imgutils_init.c
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/error.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/imgutils_internal.h"
|
||||
#include "libavutil/internal.h"
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize,
|
||||
const uint8_t *src, ptrdiff_t src_linesize,
|
||||
ptrdiff_t bytewidth, int height);
|
||||
|
||||
int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize,
|
||||
const uint8_t *src, ptrdiff_t src_linesize,
|
||||
ptrdiff_t bytewidth, int height)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64);
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) &&
|
||||
bw_aligned <= dst_linesize && bw_aligned <= src_linesize)
|
||||
ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize,
|
||||
bw_aligned, height);
|
||||
else
|
||||
return AVERROR(ENOSYS);
|
||||
|
||||
return 0;
|
||||
}
|
139
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/intmath.h
vendored
Normal file
139
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/intmath.h
vendored
Normal file
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_INTMATH_H
|
||||
#define AVUTIL_X86_INTMATH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#if HAVE_FAST_CLZ
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#include "config.h"
|
||||
|
||||
#if HAVE_FAST_CLZ
|
||||
#if (defined(__INTEL_COMPILER) && (__INTEL_COMPILER>=1216)) || defined(_MSC_VER)
|
||||
# if defined(__INTEL_COMPILER)
|
||||
# define ff_log2(x) (_bit_scan_reverse((x)|1))
|
||||
# else
|
||||
# define ff_log2 ff_log2_x86
|
||||
static av_always_inline av_const int ff_log2_x86(unsigned int v)
|
||||
{
|
||||
unsigned long n;
|
||||
_BitScanReverse(&n, v|1);
|
||||
return n;
|
||||
}
|
||||
# endif
|
||||
# define ff_log2_16bit av_log2
|
||||
|
||||
#if defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1700) && \
|
||||
(defined(__BMI__) || !defined(__clang__)))
|
||||
# define ff_ctz(v) _tzcnt_u32(v)
|
||||
|
||||
# if ARCH_X86_64
|
||||
# define ff_ctzll(v) _tzcnt_u64(v)
|
||||
# else
|
||||
# define ff_ctzll ff_ctzll_x86
|
||||
static av_always_inline av_const int ff_ctzll_x86(long long v)
|
||||
{
|
||||
return ((uint32_t)v == 0) ? _tzcnt_u32((uint32_t)(v >> 32)) + 32 : _tzcnt_u32((uint32_t)v);
|
||||
}
|
||||
# endif
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#endif /* __INTEL_COMPILER */
|
||||
|
||||
#endif /* HAVE_FAST_CLZ */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
/* Our generic version of av_popcount is faster than GCC's built-in on
|
||||
* CPUs that don't support the popcnt instruction.
|
||||
*/
|
||||
#if defined(__POPCNT__)
|
||||
#define av_popcount __builtin_popcount
|
||||
#if ARCH_X86_64
|
||||
#define av_popcount64 __builtin_popcountll
|
||||
#endif
|
||||
|
||||
#endif /* __POPCNT__ */
|
||||
|
||||
#if defined(__BMI2__)
|
||||
|
||||
#if AV_GCC_VERSION_AT_LEAST(5,1)
|
||||
#define av_mod_uintp2 __builtin_ia32_bzhi_si
|
||||
#elif HAVE_INLINE_ASM
|
||||
/* GCC releases before 5.1.0 have a broken bzhi builtin, so for those we
|
||||
* implement it using inline assembly
|
||||
*/
|
||||
#define av_mod_uintp2 av_mod_uintp2_bmi2
|
||||
static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigned p)
|
||||
{
|
||||
if (av_builtin_constant_p(p))
|
||||
return a & ((1 << p) - 1);
|
||||
else {
|
||||
unsigned x;
|
||||
__asm__ ("bzhi %2, %1, %0 \n\t" : "=r"(x) : "rm"(a), "r"(p));
|
||||
return x;
|
||||
}
|
||||
}
|
||||
#endif /* AV_GCC_VERSION_AT_LEAST */
|
||||
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
#if defined(__SSE2__) && !defined(__INTEL_COMPILER)
|
||||
|
||||
#define av_clipd av_clipd_sse2
|
||||
static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
|
||||
{
|
||||
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
|
||||
if (amin > amax) abort();
|
||||
#endif
|
||||
__asm__ ("minsd %2, %0 \n\t"
|
||||
"maxsd %1, %0 \n\t"
|
||||
: "+&x"(a) : "xm"(amin), "xm"(amax));
|
||||
return a;
|
||||
}
|
||||
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
#if defined(__SSE__) && !defined(__INTEL_COMPILER)
|
||||
|
||||
#define av_clipf av_clipf_sse
|
||||
static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
|
||||
{
|
||||
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
|
||||
if (amin > amax) abort();
|
||||
#endif
|
||||
__asm__ ("minss %2, %0 \n\t"
|
||||
"maxss %1, %0 \n\t"
|
||||
: "+&x"(a) : "xm"(amin), "xm"(amax));
|
||||
return a;
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#endif /* AVUTIL_X86_INTMATH_H */
|
97
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/intreadwrite.h
vendored
Normal file
97
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/intreadwrite.h
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_INTREADWRITE_H
|
||||
#define AVUTIL_X86_INTREADWRITE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_MMX
|
||||
|
||||
#if !HAVE_FAST_64BIT && defined(__MMX__)
|
||||
|
||||
#define AV_COPY64 AV_COPY64
|
||||
static av_always_inline void AV_COPY64(void *d, const void *s)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
: "m" (*(const uint64_t*)s)
|
||||
: "mm0");
|
||||
}
|
||||
|
||||
#define AV_SWAP64 AV_SWAP64
|
||||
static av_always_inline void AV_SWAP64(void *a, void *b)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %0, %%mm1 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq %%mm1, %1 \n\t"
|
||||
: "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b)
|
||||
::"mm0", "mm1");
|
||||
}
|
||||
|
||||
#define AV_ZERO64 AV_ZERO64
|
||||
static av_always_inline void AV_ZERO64(void *d)
|
||||
{
|
||||
__asm__("pxor %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
:: "mm0");
|
||||
}
|
||||
|
||||
#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */
|
||||
|
||||
#ifdef __SSE__
|
||||
|
||||
#define AV_COPY128 AV_COPY128
|
||||
static av_always_inline void AV_COPY128(void *d, const void *s)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("movaps %1, %%xmm0 \n\t"
|
||||
"movaps %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
: "m" (*(const struct v*)s)
|
||||
: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
#define AV_ZERO128 AV_ZERO128
|
||||
static av_always_inline void AV_ZERO128(void *d)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("pxor %%xmm0, %%xmm0 \n\t"
|
||||
"movdqa %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
:: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
#endif /* HAVE_MMX */
|
||||
|
||||
#endif /* AVUTIL_X86_INTREADWRITE_H */
|
45
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/lls_init.c
vendored
Normal file
45
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/lls_init.c
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* linear least squares model
|
||||
*
|
||||
* Copyright (c) 2013 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/lls.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
|
||||
void ff_update_lls_sse2(LLSModel *m, const double *var);
|
||||
void ff_update_lls_avx(LLSModel *m, const double *var);
|
||||
void ff_update_lls_fma3(LLSModel *m, const double *var);
|
||||
double ff_evaluate_lls_sse2(LLSModel *m, const double *var, int order);
|
||||
|
||||
av_cold void ff_init_lls_x86(LLSModel *m)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_sse2;
|
||||
if (m->indep_count >= 4)
|
||||
m->evaluate_lls = ff_evaluate_lls_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_avx;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_fma3;
|
||||
}
|
||||
}
|
26
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/pixelutils.h
vendored
Normal file
26
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/pixelutils.h
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_PIXELUTILS_H
|
||||
#define AVUTIL_X86_PIXELUTILS_H
|
||||
|
||||
#include "libavutil/pixelutils.h"
|
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned);
|
||||
|
||||
#endif /* AVUTIL_X86_PIXELUTILS_H */
|
94
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/pixelutils_init.c
vendored
Normal file
94
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/pixelutils_init.c
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pixelutils.h"
|
||||
#include "cpu.h"
|
||||
|
||||
int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
sad[2] = ff_pixelutils_sad_8x8_mmx;
|
||||
}
|
||||
|
||||
// The best way to use SSE2 would be to do 2 SADs in parallel,
|
||||
// but we'd have to modify the pixelutils API to return SIMD functions.
|
||||
|
||||
// It's probably not faster to shuffle data around
|
||||
// to get two lines of 8 pixels into a single 16byte register,
|
||||
// so just use the MMX 8x8 version even when SSE2 is available.
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
sad[2] = ff_pixelutils_sad_8x8_mmxext;
|
||||
sad[3] = ff_pixelutils_sad_16x16_mmxext;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
}
|
50
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/timer.h
vendored
Normal file
50
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/timer.h
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_TIMER_H
|
||||
#define AVUTIL_X86_TIMER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define FF_TIMER_UNITS "decicycles"
|
||||
#define AV_READ_TIME read_time
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
uint32_t a, d;
|
||||
__asm__ volatile(
|
||||
#if ARCH_X86_64 || defined(__SSE2__)
|
||||
"lfence \n\t"
|
||||
#endif
|
||||
"rdtsc \n\t"
|
||||
: "=a" (a), "=d" (d));
|
||||
return ((uint64_t)d << 32) + a;
|
||||
}
|
||||
|
||||
#elif HAVE_RDTSC
|
||||
|
||||
#include <intrin.h>
|
||||
#define AV_READ_TIME __rdtsc
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_X86_TIMER_H */
|
78
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/w64xmmtest.h
vendored
Normal file
78
trunk/3rdparty/ffmpeg-4.2-fit/libavutil/x86/w64xmmtest.h
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* check XMM registers for clobbers on Win64
|
||||
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_W64XMMTEST_H
|
||||
#define AVUTIL_X86_W64XMMTEST_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
|
||||
#define storexmmregs(mem) \
|
||||
__asm__ volatile( \
|
||||
"movups %%xmm6 , 0x00(%0)\n\t" \
|
||||
"movups %%xmm7 , 0x10(%0)\n\t" \
|
||||
"movups %%xmm8 , 0x20(%0)\n\t" \
|
||||
"movups %%xmm9 , 0x30(%0)\n\t" \
|
||||
"movups %%xmm10, 0x40(%0)\n\t" \
|
||||
"movups %%xmm11, 0x50(%0)\n\t" \
|
||||
"movups %%xmm12, 0x60(%0)\n\t" \
|
||||
"movups %%xmm13, 0x70(%0)\n\t" \
|
||||
"movups %%xmm14, 0x80(%0)\n\t" \
|
||||
"movups %%xmm15, 0x90(%0)\n\t" \
|
||||
:: "r"(mem) : "memory")
|
||||
|
||||
#define testxmmclobbers(func, ctx, ...) \
|
||||
uint64_t xmm[2][10][2]; \
|
||||
int ret; \
|
||||
storexmmregs(xmm[0]); \
|
||||
ret = __real_ ## func(ctx, __VA_ARGS__); \
|
||||
storexmmregs(xmm[1]); \
|
||||
if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \
|
||||
int i; \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"XMM REGS CLOBBERED IN %s!\n", #func); \
|
||||
for (i = 0; i < 10; i ++) \
|
||||
if (xmm[0][i][0] != xmm[1][i][0] || \
|
||||
xmm[0][i][1] != xmm[1][i][1]) { \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \
|
||||
6 + i, av_bswap64(xmm[0][i][0]), \
|
||||
av_bswap64(xmm[0][i][1])); \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
" -> %016"PRIx64"%016"PRIx64"\n", \
|
||||
av_bswap64(xmm[1][i][0]), \
|
||||
av_bswap64(xmm[1][i][1])); \
|
||||
} \
|
||||
abort(); \
|
||||
} \
|
||||
return ret
|
||||
|
||||
#define wrap(func) \
|
||||
int __real_ ## func; \
|
||||
int __wrap_ ## func; \
|
||||
int __wrap_ ## func
|
||||
|
||||
#endif /* AVUTIL_X86_W64XMMTEST_H */
|
Loading…
Add table
Add a link
Reference in a new issue