mirror of
https://github.com/ossrs/srs.git
synced 2025-03-09 15:49:59 +00:00
Rename ffmpeg-4.2-fit to ffmpeg-4-fit
This commit is contained in:
parent
b19074721c
commit
27712fdda7
720 changed files with 14 additions and 14 deletions
18
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/Makefile
vendored
Normal file
18
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/Makefile
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
OBJS += x86/cpu.o \
|
||||
x86/fixed_dsp_init.o \
|
||||
x86/float_dsp_init.o \
|
||||
x86/imgutils_init.o \
|
||||
x86/lls_init.o \
|
||||
|
||||
OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \
|
||||
|
||||
EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o
|
||||
|
||||
X86ASM-OBJS += x86/cpuid.o \
|
||||
$(EMMS_OBJS__yes_) \
|
||||
x86/fixed_dsp.o \
|
||||
x86/float_dsp.o \
|
||||
x86/imgutils.o \
|
||||
x86/lls.o \
|
||||
|
||||
X86ASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \
|
154
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/asm.h
vendored
Normal file
154
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/asm.h
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_ASM_H
|
||||
#define AVUTIL_X86_ASM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
|
||||
typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
|
||||
|
||||
#if ARCH_X86_64
|
||||
# define FF_OPSIZE "q"
|
||||
# define FF_REG_a "rax"
|
||||
# define FF_REG_b "rbx"
|
||||
# define FF_REG_c "rcx"
|
||||
# define FF_REG_d "rdx"
|
||||
# define FF_REG_D "rdi"
|
||||
# define FF_REG_S "rsi"
|
||||
# define FF_PTR_SIZE "8"
|
||||
typedef int64_t x86_reg;
|
||||
|
||||
/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
|
||||
# define FF_REG_sp "rsp"
|
||||
# define FF_REG_BP "rbp"
|
||||
# define FF_REGBP rbp
|
||||
# define FF_REGa rax
|
||||
# define FF_REGb rbx
|
||||
# define FF_REGc rcx
|
||||
# define FF_REGd rdx
|
||||
# define FF_REGSP rsp
|
||||
|
||||
#elif ARCH_X86_32
|
||||
|
||||
# define FF_OPSIZE "l"
|
||||
# define FF_REG_a "eax"
|
||||
# define FF_REG_b "ebx"
|
||||
# define FF_REG_c "ecx"
|
||||
# define FF_REG_d "edx"
|
||||
# define FF_REG_D "edi"
|
||||
# define FF_REG_S "esi"
|
||||
# define FF_PTR_SIZE "4"
|
||||
typedef int32_t x86_reg;
|
||||
|
||||
# define FF_REG_sp "esp"
|
||||
# define FF_REG_BP "ebp"
|
||||
# define FF_REGBP ebp
|
||||
# define FF_REGa eax
|
||||
# define FF_REGb ebx
|
||||
# define FF_REGc ecx
|
||||
# define FF_REGd edx
|
||||
# define FF_REGSP esp
|
||||
#else
|
||||
typedef int x86_reg;
|
||||
#endif
|
||||
|
||||
#define HAVE_7REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE && HAVE_EBP_AVAILABLE))
|
||||
#define HAVE_6REGS (ARCH_X86_64 || (HAVE_EBX_AVAILABLE || HAVE_EBP_AVAILABLE))
|
||||
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define BROKEN_RELOCATIONS 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If gcc is not set to support sse (-msse) it will not accept xmm registers
|
||||
* in the clobber list for inline asm. XMM_CLOBBERS takes a list of xmm
|
||||
* registers to be marked as clobbered and evaluates to nothing if they are
|
||||
* not supported, or to the list itself if they are supported. Since a clobber
|
||||
* list may not be empty, XMM_CLOBBERS_ONLY should be used if the xmm
|
||||
* registers are the only in the clobber list.
|
||||
* For example a list with "eax" and "xmm0" as clobbers should become:
|
||||
* : XMM_CLOBBERS("xmm0",) "eax"
|
||||
* and a list with only "xmm0" should become:
|
||||
* XMM_CLOBBERS_ONLY("xmm0")
|
||||
*/
|
||||
#if HAVE_XMM_CLOBBERS
|
||||
# define XMM_CLOBBERS(...) __VA_ARGS__
|
||||
# define XMM_CLOBBERS_ONLY(...) : __VA_ARGS__
|
||||
#else
|
||||
# define XMM_CLOBBERS(...)
|
||||
# define XMM_CLOBBERS_ONLY(...)
|
||||
#endif
|
||||
|
||||
/* Use to export labels from asm. */
|
||||
#define LABEL_MANGLE(a) EXTERN_PREFIX #a
|
||||
|
||||
// Use rip-relative addressing if compiling PIC code on x86-64.
|
||||
#if ARCH_X86_64 && defined(PIC)
|
||||
# define LOCAL_MANGLE(a) #a "(%%rip)"
|
||||
#else
|
||||
# define LOCAL_MANGLE(a) #a
|
||||
#endif
|
||||
|
||||
#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
|
||||
# define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
|
||||
# define NAMED_CONSTRAINTS_ADD(...)
|
||||
# define NAMED_CONSTRAINTS(...)
|
||||
# define NAMED_CONSTRAINTS_ARRAY_ADD(...)
|
||||
# define NAMED_CONSTRAINTS_ARRAY(...)
|
||||
#else
|
||||
/* When direct symbol references are used in code passed to a compiler that does not support them
|
||||
* then these references need to be converted to named asm constraints instead.
|
||||
* Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol.
|
||||
* In order for this to work there must also be a corresponding entry in the asm-interface. To add this
|
||||
* entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the
|
||||
* corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ).
|
||||
* If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list.
|
||||
*/
|
||||
# define MANGLE(a) "%["#a"]"
|
||||
// Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work
|
||||
# define FE_0(P,X) P(X)
|
||||
# define FE_1(P,X,X1) P(X), FE_0(P,X1)
|
||||
# define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2)
|
||||
# define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3)
|
||||
# define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4)
|
||||
# define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5)
|
||||
# define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6)
|
||||
# define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7)
|
||||
# define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8)
|
||||
# define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9)
|
||||
# define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
|
||||
# define GET_FE(A) GET_FE_IMPL A
|
||||
# define GET_FE_GLUE(x, y) x y
|
||||
# define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__))
|
||||
# define NAME_CONSTRAINT(x) [x] "m"(x)
|
||||
// Parameters are a list of each symbol reference required
|
||||
# define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
|
||||
// Same but without comma for when there are no previously defined constraints
|
||||
# define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
|
||||
// Same as above NAMED_CONSTRAINTS except used for passing arrays/pointers instead of normal variables
|
||||
# define NAME_CONSTRAINT_ARRAY(x) [x] "m"(*x)
|
||||
# define NAMED_CONSTRAINTS_ARRAY_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT_ARRAY,__VA_ARGS__)
|
||||
# define NAMED_CONSTRAINTS_ARRAY(...) FOR_EACH_VA(NAME_CONSTRAINT_ARRAY,__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#endif /* AVUTIL_X86_ASM_H */
|
87
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/bswap.h
vendored
Normal file
87
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/bswap.h
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* byte swapping routines
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_BSWAP_H
|
||||
#define AVUTIL_X86_BSWAP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
|
||||
{
|
||||
return _rotr16(x, 8);
|
||||
}
|
||||
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
return _byteswap_ulong(x);
|
||||
}
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define av_bswap64 av_bswap64
|
||||
static inline uint64_t av_const av_bswap64(uint64_t x)
|
||||
{
|
||||
return _byteswap_uint64(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
#if AV_GCC_VERSION_AT_MOST(4,0)
|
||||
#define av_bswap16 av_bswap16
|
||||
static av_always_inline av_const unsigned av_bswap16(unsigned x)
|
||||
{
|
||||
__asm__("rorw $8, %w0" : "+r"(x));
|
||||
return x;
|
||||
}
|
||||
#endif /* AV_GCC_VERSION_AT_MOST(4,0) */
|
||||
|
||||
#if AV_GCC_VERSION_AT_MOST(4,4) || defined(__INTEL_COMPILER)
|
||||
#define av_bswap32 av_bswap32
|
||||
static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
|
||||
{
|
||||
__asm__("bswap %0" : "+r" (x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#if ARCH_X86_64
|
||||
#define av_bswap64 av_bswap64
|
||||
static inline uint64_t av_const av_bswap64(uint64_t x)
|
||||
{
|
||||
__asm__("bswap %0": "=r" (x) : "0" (x));
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
#endif /* AVUTIL_X86_BSWAP_H */
|
272
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpu.c
vendored
Normal file
272
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpu.c
vendored
Normal file
|
@ -0,0 +1,272 @@
|
|||
/*
|
||||
* CPU detection code, extracted from mmx.h
|
||||
* (c)1997-99 by H. Dietz and R. Fisher
|
||||
* Converted to C and improved by Fabrice Bellard.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#if HAVE_X86ASM
|
||||
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
ff_cpu_xgetbv(index, &eax, &edx)
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
|
||||
#define cpuid(index, eax, ebx, ecx, edx) \
|
||||
__asm__ volatile ( \
|
||||
"mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%"FF_REG_b", %%"FF_REG_S \
|
||||
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
|
||||
: "0" (index), "2"(0))
|
||||
|
||||
#define xgetbv(index, eax, edx) \
|
||||
__asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
|
||||
|
||||
#define get_eflags(x) \
|
||||
__asm__ volatile ("pushfl \n" \
|
||||
"pop %0 \n" \
|
||||
: "=r"(x))
|
||||
|
||||
#define set_eflags(x) \
|
||||
__asm__ volatile ("push %0 \n" \
|
||||
"popfl \n" \
|
||||
:: "r"(x))
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#if ARCH_X86_64
|
||||
|
||||
#define cpuid_test() 1
|
||||
|
||||
#elif HAVE_X86ASM
|
||||
|
||||
#define cpuid_test ff_cpu_cpuid_test
|
||||
|
||||
#elif HAVE_INLINE_ASM
|
||||
|
||||
static int cpuid_test(void)
|
||||
{
|
||||
x86_reg a, c;
|
||||
|
||||
/* Check if CPUID is supported by attempting to toggle the ID bit in
|
||||
* the EFLAGS register. */
|
||||
get_eflags(a);
|
||||
set_eflags(a ^ 0x200000);
|
||||
get_eflags(c);
|
||||
|
||||
return a != c;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function to test if multimedia instructions are supported... */
|
||||
int ff_get_cpu_flags_x86(void)
|
||||
{
|
||||
int rval = 0;
|
||||
|
||||
#ifdef cpuid
|
||||
|
||||
int eax, ebx, ecx, edx;
|
||||
int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
|
||||
int family = 0, model = 0;
|
||||
union { int i[3]; char c[12]; } vendor;
|
||||
int xcr0_lo = 0, xcr0_hi = 0;
|
||||
|
||||
if (!cpuid_test())
|
||||
return 0; /* CPUID not supported */
|
||||
|
||||
cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
|
||||
|
||||
if (max_std_level >= 1) {
|
||||
cpuid(1, eax, ebx, ecx, std_caps);
|
||||
family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
|
||||
model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
|
||||
if (std_caps & (1 << 15))
|
||||
rval |= AV_CPU_FLAG_CMOV;
|
||||
if (std_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
#if HAVE_SSE
|
||||
if (std_caps & (1 << 25))
|
||||
rval |= AV_CPU_FLAG_SSE;
|
||||
if (std_caps & (1 << 26))
|
||||
rval |= AV_CPU_FLAG_SSE2;
|
||||
if (ecx & 1)
|
||||
rval |= AV_CPU_FLAG_SSE3;
|
||||
if (ecx & 0x00000200 )
|
||||
rval |= AV_CPU_FLAG_SSSE3;
|
||||
if (ecx & 0x00080000 )
|
||||
rval |= AV_CPU_FLAG_SSE4;
|
||||
if (ecx & 0x00100000 )
|
||||
rval |= AV_CPU_FLAG_SSE42;
|
||||
if (ecx & 0x02000000 )
|
||||
rval |= AV_CPU_FLAG_AESNI;
|
||||
#if HAVE_AVX
|
||||
/* Check OXSAVE and AVX bits */
|
||||
if ((ecx & 0x18000000) == 0x18000000) {
|
||||
/* Check for OS support */
|
||||
xgetbv(0, xcr0_lo, xcr0_hi);
|
||||
if ((xcr0_lo & 0x6) == 0x6) {
|
||||
rval |= AV_CPU_FLAG_AVX;
|
||||
if (ecx & 0x00001000)
|
||||
rval |= AV_CPU_FLAG_FMA3;
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_AVX */
|
||||
#endif /* HAVE_SSE */
|
||||
}
|
||||
if (max_std_level >= 7) {
|
||||
cpuid(7, eax, ebx, ecx, edx);
|
||||
#if HAVE_AVX2
|
||||
if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
|
||||
rval |= AV_CPU_FLAG_AVX2;
|
||||
#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
|
||||
if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
|
||||
if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
|
||||
rval |= AV_CPU_FLAG_AVX512;
|
||||
|
||||
}
|
||||
#endif /* HAVE_AVX512 */
|
||||
#endif /* HAVE_AVX2 */
|
||||
/* BMI1/2 don't need OS support */
|
||||
if (ebx & 0x00000008) {
|
||||
rval |= AV_CPU_FLAG_BMI1;
|
||||
if (ebx & 0x00000100)
|
||||
rval |= AV_CPU_FLAG_BMI2;
|
||||
}
|
||||
}
|
||||
|
||||
cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
|
||||
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
cpuid(0x80000001, eax, ebx, ecx, ext_caps);
|
||||
if (ext_caps & (1U << 31))
|
||||
rval |= AV_CPU_FLAG_3DNOW;
|
||||
if (ext_caps & (1 << 30))
|
||||
rval |= AV_CPU_FLAG_3DNOWEXT;
|
||||
if (ext_caps & (1 << 23))
|
||||
rval |= AV_CPU_FLAG_MMX;
|
||||
if (ext_caps & (1 << 22))
|
||||
rval |= AV_CPU_FLAG_MMXEXT;
|
||||
|
||||
if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
|
||||
/* Allow for selectively disabling SSE2 functions on AMD processors
|
||||
with SSE2 support but not SSE4a. This includes Athlon64, some
|
||||
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
|
||||
than SSE2 often enough to utilize this special-case flag.
|
||||
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
|
||||
so that SSE2 is used unless explicitly disabled by checking
|
||||
AV_CPU_FLAG_SSE2SLOW. */
|
||||
if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
|
||||
rval |= AV_CPU_FLAG_SSE2SLOW;
|
||||
|
||||
/* Similar to the above but for AVX functions on AMD processors.
|
||||
This is necessary only for functions using YMM registers on Bulldozer
|
||||
and Jaguar based CPUs as they lack 256-bit execution units. SSE/AVX
|
||||
functions using XMM registers are always faster on them.
|
||||
AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
|
||||
used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. */
|
||||
if ((family == 0x15 || family == 0x16) && (rval & AV_CPU_FLAG_AVX))
|
||||
rval |= AV_CPU_FLAG_AVXSLOW;
|
||||
}
|
||||
|
||||
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
|
||||
* used unless the OS has AVX support. */
|
||||
if (rval & AV_CPU_FLAG_AVX) {
|
||||
if (ecx & 0x00000800)
|
||||
rval |= AV_CPU_FLAG_XOP;
|
||||
if (ecx & 0x00010000)
|
||||
rval |= AV_CPU_FLAG_FMA4;
|
||||
}
|
||||
}
|
||||
|
||||
if (!strncmp(vendor.c, "GenuineIntel", 12)) {
|
||||
if (family == 6 && (model == 9 || model == 13 || model == 14)) {
|
||||
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
|
||||
* 6/14 (core1 "yonah") theoretically support sse2, but it's
|
||||
* usually slower than mmx, so let's just pretend they don't.
|
||||
* AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
|
||||
* enabled so that SSE2 is not used unless explicitly enabled
|
||||
* by checking AV_CPU_FLAG_SSE2SLOW. The same situation
|
||||
* applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
|
||||
if (rval & AV_CPU_FLAG_SSE2)
|
||||
rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
|
||||
if (rval & AV_CPU_FLAG_SSE3)
|
||||
rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
|
||||
}
|
||||
/* The Atom processor has SSSE3 support, which is useful in many cases,
|
||||
* but sometimes the SSSE3 version is slower than the SSE2 equivalent
|
||||
* on the Atom, but is generally faster on other processors supporting
|
||||
* SSSE3. This flag allows for selectively disabling certain SSSE3
|
||||
* functions on the Atom. */
|
||||
if (family == 6 && model == 28)
|
||||
rval |= AV_CPU_FLAG_ATOM;
|
||||
|
||||
/* Conroe has a slow shuffle unit. Check the model number to ensure not
|
||||
* to include crippled low-end Penryns and Nehalems that lack SSE4. */
|
||||
if ((rval & AV_CPU_FLAG_SSSE3) && !(rval & AV_CPU_FLAG_SSE4) &&
|
||||
family == 6 && model < 23)
|
||||
rval |= AV_CPU_FLAG_SSSE3SLOW;
|
||||
}
|
||||
|
||||
#endif /* cpuid */
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
size_t ff_get_cpu_max_align_x86(void)
|
||||
{
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_AVX512)
|
||||
return 64;
|
||||
if (flags & (AV_CPU_FLAG_AVX2 |
|
||||
AV_CPU_FLAG_AVX |
|
||||
AV_CPU_FLAG_XOP |
|
||||
AV_CPU_FLAG_FMA4 |
|
||||
AV_CPU_FLAG_FMA3 |
|
||||
AV_CPU_FLAG_AVXSLOW))
|
||||
return 32;
|
||||
if (flags & (AV_CPU_FLAG_AESNI |
|
||||
AV_CPU_FLAG_SSE42 |
|
||||
AV_CPU_FLAG_SSE4 |
|
||||
AV_CPU_FLAG_SSSE3 |
|
||||
AV_CPU_FLAG_SSE3 |
|
||||
AV_CPU_FLAG_SSE2 |
|
||||
AV_CPU_FLAG_SSE |
|
||||
AV_CPU_FLAG_ATOM |
|
||||
AV_CPU_FLAG_SSSE3SLOW |
|
||||
AV_CPU_FLAG_SSE3SLOW |
|
||||
AV_CPU_FLAG_SSE2SLOW))
|
||||
return 16;
|
||||
|
||||
return 8;
|
||||
}
|
113
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpu.h
vendored
Normal file
113
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpu.h
vendored
Normal file
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_CPU_H
|
||||
#define AVUTIL_X86_CPU_H
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#define AV_CPU_FLAG_AMD3DNOW AV_CPU_FLAG_3DNOW
|
||||
#define AV_CPU_FLAG_AMD3DNOWEXT AV_CPU_FLAG_3DNOWEXT
|
||||
|
||||
#define X86_AMD3DNOW(flags) CPUEXT(flags, AMD3DNOW)
|
||||
#define X86_AMD3DNOWEXT(flags) CPUEXT(flags, AMD3DNOWEXT)
|
||||
#define X86_MMX(flags) CPUEXT(flags, MMX)
|
||||
#define X86_MMXEXT(flags) CPUEXT(flags, MMXEXT)
|
||||
#define X86_SSE(flags) CPUEXT(flags, SSE)
|
||||
#define X86_SSE2(flags) CPUEXT(flags, SSE2)
|
||||
#define X86_SSE2_FAST(flags) CPUEXT_FAST(flags, SSE2)
|
||||
#define X86_SSE2_SLOW(flags) CPUEXT_SLOW(flags, SSE2)
|
||||
#define X86_SSE3(flags) CPUEXT(flags, SSE3)
|
||||
#define X86_SSE3_FAST(flags) CPUEXT_FAST(flags, SSE3)
|
||||
#define X86_SSE3_SLOW(flags) CPUEXT_SLOW(flags, SSE3)
|
||||
#define X86_SSSE3(flags) CPUEXT(flags, SSSE3)
|
||||
#define X86_SSSE3_FAST(flags) CPUEXT_FAST(flags, SSSE3)
|
||||
#define X86_SSSE3_SLOW(flags) CPUEXT_SLOW(flags, SSSE3)
|
||||
#define X86_SSE4(flags) CPUEXT(flags, SSE4)
|
||||
#define X86_SSE42(flags) CPUEXT(flags, SSE42)
|
||||
#define X86_AVX(flags) CPUEXT(flags, AVX)
|
||||
#define X86_AVX_FAST(flags) CPUEXT_FAST(flags, AVX)
|
||||
#define X86_AVX_SLOW(flags) CPUEXT_SLOW(flags, AVX)
|
||||
#define X86_XOP(flags) CPUEXT(flags, XOP)
|
||||
#define X86_FMA3(flags) CPUEXT(flags, FMA3)
|
||||
#define X86_FMA4(flags) CPUEXT(flags, FMA4)
|
||||
#define X86_AVX2(flags) CPUEXT(flags, AVX2)
|
||||
#define X86_AESNI(flags) CPUEXT(flags, AESNI)
|
||||
#define X86_AVX512(flags) CPUEXT(flags, AVX512)
|
||||
|
||||
#define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
|
||||
#define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
|
||||
#define EXTERNAL_MMX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMX)
|
||||
#define EXTERNAL_MMXEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, MMXEXT)
|
||||
#define EXTERNAL_SSE(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE)
|
||||
#define EXTERNAL_SSE2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE2)
|
||||
#define EXTERNAL_SSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSE3)
|
||||
#define EXTERNAL_SSSE3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, SSSE3)
|
||||
#define EXTERNAL_SSE4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE4)
|
||||
#define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42)
|
||||
#define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX)
|
||||
#define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
|
||||
#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
|
||||
#define EXTERNAL_FMA3_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, FMA3, AVX)
|
||||
#define EXTERNAL_FMA3_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, FMA3, AVX)
|
||||
#define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
|
||||
#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
|
||||
#define EXTERNAL_AVX2_FAST(flags) CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX)
|
||||
#define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
|
||||
#define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
|
||||
#define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
|
||||
|
||||
#define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
|
||||
#define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
|
||||
#define INLINE_MMX(flags) CPUEXT_SUFFIX(flags, _INLINE, MMX)
|
||||
#define INLINE_MMXEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, MMXEXT)
|
||||
#define INLINE_SSE(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE)
|
||||
#define INLINE_SSE2(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE2_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE2_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE2)
|
||||
#define INLINE_SSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSE3)
|
||||
#define INLINE_SSSE3(flags) CPUEXT_SUFFIX(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSSE3_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSSE3_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, SSSE3)
|
||||
#define INLINE_SSE4(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE4)
|
||||
#define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42)
|
||||
#define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX)
|
||||
#define INLINE_AVX_FAST(flags) CPUEXT_SUFFIX_FAST(flags, _INLINE, AVX)
|
||||
#define INLINE_AVX_SLOW(flags) CPUEXT_SUFFIX_SLOW(flags, _INLINE, AVX)
|
||||
#define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP)
|
||||
#define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3)
|
||||
#define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4)
|
||||
#define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2)
|
||||
#define INLINE_AESNI(flags) CPUEXT_SUFFIX(flags, _INLINE, AESNI)
|
||||
|
||||
void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void ff_cpu_xgetbv(int op, int *eax, int *edx);
|
||||
int ff_cpu_cpuid_test(void);
|
||||
|
||||
#endif /* AVUTIL_X86_CPU_H */
|
91
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpuid.asm
vendored
Normal file
91
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/cpuid.asm
vendored
Normal file
|
@ -0,0 +1,91 @@
|
|||
;*****************************************************************************
|
||||
;* Copyright (C) 2005-2010 x264 project
|
||||
;*
|
||||
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
;* Fiona Glaser <fiona@x264.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_cpuid, 5,7
|
||||
push rbx
|
||||
push r4
|
||||
push r3
|
||||
push r2
|
||||
push r1
|
||||
mov eax, r0d
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
pop r4
|
||||
mov [r4], eax
|
||||
pop r4
|
||||
mov [r4], ebx
|
||||
pop r4
|
||||
mov [r4], ecx
|
||||
pop r4
|
||||
mov [r4], edx
|
||||
pop rbx
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_cpu_xgetbv(int op, int *eax, int *edx)
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_xgetbv, 3,7
|
||||
push r2
|
||||
push r1
|
||||
mov ecx, r0d
|
||||
xgetbv
|
||||
pop r4
|
||||
mov [r4], eax
|
||||
pop r4
|
||||
mov [r4], edx
|
||||
RET
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
;-----------------------------------------------------------------------------
|
||||
; int ff_cpu_cpuid_test(void)
|
||||
; return 0 if unsupported
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal cpu_cpuid_test
|
||||
pushfd
|
||||
push ebx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 0x200000
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
xor eax, ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
popfd
|
||||
ret
|
||||
%endif
|
55
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/emms.h
vendored
Normal file
55
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/emms.h
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_EMMS_H
|
||||
#define AVUTIL_X86_EMMS_H
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
|
||||
void avpriv_emms_asm(void);
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
# define emms_c emms_c
|
||||
/**
|
||||
* Empty mmx state.
|
||||
* this must be called between any dsp function and float/double code.
|
||||
* for example sin(); dsp->idct_put(); emms_c(); cos()
|
||||
* Note, *alloc() and *free() also use float code in some libc implementations
|
||||
* thus this also applies to them or any function using them.
|
||||
*/
|
||||
static av_always_inline void emms_c(void)
|
||||
{
|
||||
/* Some inlined functions may also use mmx instructions regardless of
|
||||
* runtime cpuflags. With that in mind, we unconditionally empty the
|
||||
* mmx state if the target cpu chosen at configure time supports it.
|
||||
*/
|
||||
#if !defined(__MMX__)
|
||||
if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)
|
||||
#endif
|
||||
__asm__ volatile ("emms" ::: "memory");
|
||||
}
|
||||
#elif HAVE_MMX && HAVE_MM_EMPTY
|
||||
# include <mmintrin.h>
|
||||
# define emms_c _mm_empty
|
||||
#elif HAVE_MMX_EXTERNAL
|
||||
# define emms_c avpriv_emms_asm
|
||||
#endif /* HAVE_MMX_INLINE */
|
||||
|
||||
#endif /* AVUTIL_X86_EMMS_H */
|
48
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/fixed_dsp.asm
vendored
Normal file
48
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/fixed_dsp.asm
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
;*****************************************************************************
|
||||
;* x86-optimized Float DSP functions
|
||||
;*
|
||||
;* Copyright 2016 James Almer
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_butterflies_fixed(float *src0, float *src1, int len);
|
||||
;-----------------------------------------------------------------------------
|
||||
INIT_XMM sse2
|
||||
cglobal butterflies_fixed, 3,3,3, src0, src1, len
|
||||
shl lend, 2
|
||||
add src0q, lenq
|
||||
add src1q, lenq
|
||||
neg lenq
|
||||
|
||||
align 16
|
||||
.loop:
|
||||
mova m0, [src0q + lenq]
|
||||
mova m1, [src1q + lenq]
|
||||
mova m2, m0
|
||||
paddd m0, m1
|
||||
psubd m2, m1
|
||||
mova [src0q + lenq], m0
|
||||
mova [src1q + lenq], m2
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
RET
|
35
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/fixed_dsp_init.c
vendored
Normal file
35
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/fixed_dsp_init.c
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/fixed_dsp.h"
|
||||
#include "cpu.h"
|
||||
|
||||
void ff_butterflies_fixed_sse2(int *src0, int *src1, int len);
|
||||
|
||||
av_cold void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
fdsp->butterflies_fixed = ff_butterflies_fixed_sse2;
|
||||
}
|
||||
}
|
484
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/float_dsp.asm
vendored
Normal file
484
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/float_dsp.asm
vendored
Normal file
|
@ -0,0 +1,484 @@
|
|||
;*****************************************************************************
|
||||
;* x86-optimized Float DSP functions
|
||||
;*
|
||||
;* Copyright 2006 Loren Merritt
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION_RODATA 32
|
||||
pd_reverse: dd 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vector_fmul(float *dst, const float *src0, const float *src1, int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_FMUL 0
|
||||
cglobal vector_fmul, 4,4,2, dst, src0, src1, len
|
||||
lea lenq, [lend*4 - 64]
|
||||
ALIGN 16
|
||||
.loop:
|
||||
%assign a 0
|
||||
%rep 32/mmsize
|
||||
mova m0, [src0q + lenq + (a+0)*mmsize]
|
||||
mova m1, [src0q + lenq + (a+1)*mmsize]
|
||||
mulps m0, m0, [src1q + lenq + (a+0)*mmsize]
|
||||
mulps m1, m1, [src1q + lenq + (a+1)*mmsize]
|
||||
mova [dstq + lenq + (a+0)*mmsize], m0
|
||||
mova [dstq + lenq + (a+1)*mmsize], m1
|
||||
%assign a a+2
|
||||
%endrep
|
||||
|
||||
sub lenq, 64
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMUL
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vector_dmul(double *dst, const double *src0, const double *src1, int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_DMUL 0
|
||||
cglobal vector_dmul, 4,4,4, dst, src0, src1, len
|
||||
lea lend, [lenq*8 - mmsize*4]
|
||||
ALIGN 16
|
||||
.loop:
|
||||
movaps m0, [src0q + lenq + 0*mmsize]
|
||||
movaps m1, [src0q + lenq + 1*mmsize]
|
||||
movaps m2, [src0q + lenq + 2*mmsize]
|
||||
movaps m3, [src0q + lenq + 3*mmsize]
|
||||
mulpd m0, m0, [src1q + lenq + 0*mmsize]
|
||||
mulpd m1, m1, [src1q + lenq + 1*mmsize]
|
||||
mulpd m2, m2, [src1q + lenq + 2*mmsize]
|
||||
mulpd m3, m3, [src1q + lenq + 3*mmsize]
|
||||
movaps [dstq + lenq + 0*mmsize], m0
|
||||
movaps [dstq + lenq + 1*mmsize], m1
|
||||
movaps [dstq + lenq + 2*mmsize], m2
|
||||
movaps [dstq + lenq + 3*mmsize], m3
|
||||
|
||||
sub lenq, mmsize*4
|
||||
jge .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
VECTOR_DMUL
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_DMUL
|
||||
%endif
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_FMAC_SCALAR 0
|
||||
%if UNIX64
|
||||
cglobal vector_fmac_scalar, 3,3,5, dst, src, len
|
||||
%else
|
||||
cglobal vector_fmac_scalar, 4,4,5, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSS m0, mulm
|
||||
%else
|
||||
%if WIN64
|
||||
SWAP 0, 2
|
||||
%endif
|
||||
shufps xm0, xm0, 0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 m0, m0, xm0, 1
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*4-64]
|
||||
.loop:
|
||||
%if cpuflag(fma3)
|
||||
mova m1, [dstq+lenq]
|
||||
mova m2, [dstq+lenq+1*mmsize]
|
||||
fmaddps m1, m0, [srcq+lenq], m1
|
||||
fmaddps m2, m0, [srcq+lenq+1*mmsize], m2
|
||||
%else ; cpuflag
|
||||
mulps m1, m0, [srcq+lenq]
|
||||
mulps m2, m0, [srcq+lenq+1*mmsize]
|
||||
%if mmsize < 32
|
||||
mulps m3, m0, [srcq+lenq+2*mmsize]
|
||||
mulps m4, m0, [srcq+lenq+3*mmsize]
|
||||
%endif ; mmsize
|
||||
addps m1, m1, [dstq+lenq]
|
||||
addps m2, m2, [dstq+lenq+1*mmsize]
|
||||
%if mmsize < 32
|
||||
addps m3, m3, [dstq+lenq+2*mmsize]
|
||||
addps m4, m4, [dstq+lenq+3*mmsize]
|
||||
%endif ; mmsize
|
||||
%endif ; cpuflag
|
||||
mova [dstq+lenq], m1
|
||||
mova [dstq+lenq+1*mmsize], m2
|
||||
%if mmsize < 32
|
||||
mova [dstq+lenq+2*mmsize], m3
|
||||
mova [dstq+lenq+3*mmsize], m4
|
||||
%endif ; mmsize
|
||||
sub lenq, 64
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMAC_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMAC_SCALAR
|
||||
%endif
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
VECTOR_FMAC_SCALAR
|
||||
%endif
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_FMUL_SCALAR 0
|
||||
%if UNIX64
|
||||
cglobal vector_fmul_scalar, 3,3,2, dst, src, len
|
||||
%else
|
||||
cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
movss m0, mulm
|
||||
%elif WIN64
|
||||
SWAP 0, 2
|
||||
%endif
|
||||
shufps m0, m0, 0
|
||||
lea lenq, [lend*4-mmsize]
|
||||
.loop:
|
||||
mova m1, [srcq+lenq]
|
||||
mulps m1, m0
|
||||
mova [dstq+lenq], m1
|
||||
sub lenq, mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_SCALAR
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_dmac_scalar(double *dst, const double *src, double mul,
|
||||
; int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_DMAC_SCALAR 0
|
||||
%if ARCH_X86_32
|
||||
cglobal vector_dmac_scalar, 2,4,5, dst, src, mul, len, lenaddr
|
||||
mov lenq, lenaddrm
|
||||
VBROADCASTSD m0, mulm
|
||||
%else
|
||||
%if UNIX64
|
||||
cglobal vector_dmac_scalar, 3,3,5, dst, src, len
|
||||
%else
|
||||
cglobal vector_dmac_scalar, 4,4,5, dst, src, mul, len
|
||||
SWAP 0, 2
|
||||
%endif
|
||||
movlhps xm0, xm0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 m0, m0, xm0, 1
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*8-mmsize*4]
|
||||
.loop:
|
||||
%if cpuflag(fma3)
|
||||
movaps m1, [dstq+lenq]
|
||||
movaps m2, [dstq+lenq+1*mmsize]
|
||||
movaps m3, [dstq+lenq+2*mmsize]
|
||||
movaps m4, [dstq+lenq+3*mmsize]
|
||||
fmaddpd m1, m0, [srcq+lenq], m1
|
||||
fmaddpd m2, m0, [srcq+lenq+1*mmsize], m2
|
||||
fmaddpd m3, m0, [srcq+lenq+2*mmsize], m3
|
||||
fmaddpd m4, m0, [srcq+lenq+3*mmsize], m4
|
||||
%else ; cpuflag
|
||||
mulpd m1, m0, [srcq+lenq]
|
||||
mulpd m2, m0, [srcq+lenq+1*mmsize]
|
||||
mulpd m3, m0, [srcq+lenq+2*mmsize]
|
||||
mulpd m4, m0, [srcq+lenq+3*mmsize]
|
||||
addpd m1, m1, [dstq+lenq]
|
||||
addpd m2, m2, [dstq+lenq+1*mmsize]
|
||||
addpd m3, m3, [dstq+lenq+2*mmsize]
|
||||
addpd m4, m4, [dstq+lenq+3*mmsize]
|
||||
%endif ; cpuflag
|
||||
movaps [dstq+lenq], m1
|
||||
movaps [dstq+lenq+1*mmsize], m2
|
||||
movaps [dstq+lenq+2*mmsize], m3
|
||||
movaps [dstq+lenq+3*mmsize], m4
|
||||
sub lenq, mmsize*4
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
VECTOR_DMAC_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_DMAC_SCALAR
|
||||
%endif
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
VECTOR_DMAC_SCALAR
|
||||
%endif
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_dmul_scalar(double *dst, const double *src, double mul,
|
||||
; int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_DMUL_SCALAR 0
|
||||
%if ARCH_X86_32
|
||||
cglobal vector_dmul_scalar, 3,4,3, dst, src, mul, len, lenaddr
|
||||
mov lenq, lenaddrm
|
||||
%elif UNIX64
|
||||
cglobal vector_dmul_scalar, 3,3,3, dst, src, len
|
||||
%else
|
||||
cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSD m0, mulm
|
||||
%else
|
||||
%if WIN64
|
||||
SWAP 0, 2
|
||||
%endif
|
||||
movlhps xm0, xm0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 ym0, ym0, xm0, 1
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*8-2*mmsize]
|
||||
.loop:
|
||||
mulpd m1, m0, [srcq+lenq ]
|
||||
mulpd m2, m0, [srcq+lenq+mmsize]
|
||||
movaps [dstq+lenq ], m1
|
||||
movaps [dstq+lenq+mmsize], m2
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
VECTOR_DMUL_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_DMUL_SCALAR
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; vector_fmul_window(float *dst, const float *src0,
|
||||
; const float *src1, const float *win, int len);
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_FMUL_WINDOW 0
|
||||
cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
|
||||
shl lend, 2
|
||||
lea len1q, [lenq - mmsize]
|
||||
add src0q, lenq
|
||||
add dstq, lenq
|
||||
add winq, lenq
|
||||
neg lenq
|
||||
.loop:
|
||||
mova m0, [winq + lenq]
|
||||
mova m4, [src0q + lenq]
|
||||
%if cpuflag(sse)
|
||||
mova m1, [winq + len1q]
|
||||
mova m5, [src1q + len1q]
|
||||
shufps m1, m1, 0x1b
|
||||
shufps m5, m5, 0x1b
|
||||
mova m2, m0
|
||||
mova m3, m1
|
||||
mulps m2, m4
|
||||
mulps m3, m5
|
||||
mulps m1, m4
|
||||
mulps m0, m5
|
||||
addps m2, m3
|
||||
subps m1, m0
|
||||
shufps m2, m2, 0x1b
|
||||
%else
|
||||
pswapd m1, [winq + len1q]
|
||||
pswapd m5, [src1q + len1q]
|
||||
mova m2, m0
|
||||
mova m3, m1
|
||||
pfmul m2, m4
|
||||
pfmul m3, m5
|
||||
pfmul m1, m4
|
||||
pfmul m0, m5
|
||||
pfadd m2, m3
|
||||
pfsub m1, m0
|
||||
pswapd m2, m2
|
||||
%endif
|
||||
mova [dstq + lenq], m1
|
||||
mova [dstq + len1q], m2
|
||||
sub len1q, mmsize
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
%if mmsize == 8
|
||||
femms
|
||||
%endif
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX 3dnowext
|
||||
VECTOR_FMUL_WINDOW
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_WINDOW
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; vector_fmul_add(float *dst, const float *src0, const float *src1,
|
||||
; const float *src2, int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_FMUL_ADD 0
|
||||
cglobal vector_fmul_add, 5,5,4, dst, src0, src1, src2, len
|
||||
lea lenq, [lend*4 - 2*mmsize]
|
||||
ALIGN 16
|
||||
.loop:
|
||||
mova m0, [src0q + lenq]
|
||||
mova m1, [src0q + lenq + mmsize]
|
||||
%if cpuflag(fma3)
|
||||
mova m2, [src2q + lenq]
|
||||
mova m3, [src2q + lenq + mmsize]
|
||||
fmaddps m0, m0, [src1q + lenq], m2
|
||||
fmaddps m1, m1, [src1q + lenq + mmsize], m3
|
||||
%else
|
||||
mulps m0, m0, [src1q + lenq]
|
||||
mulps m1, m1, [src1q + lenq + mmsize]
|
||||
addps m0, m0, [src2q + lenq]
|
||||
addps m1, m1, [src2q + lenq + mmsize]
|
||||
%endif
|
||||
mova [dstq + lenq], m0
|
||||
mova [dstq + lenq + mmsize], m1
|
||||
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_ADD
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMUL_ADD
|
||||
%endif
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
VECTOR_FMUL_ADD
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vector_fmul_reverse(float *dst, const float *src0, const float *src1,
|
||||
; int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro VECTOR_FMUL_REVERSE 0
|
||||
cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
|
||||
%if cpuflag(avx2)
|
||||
movaps m2, [pd_reverse]
|
||||
%endif
|
||||
lea lenq, [lend*4 - 2*mmsize]
|
||||
ALIGN 16
|
||||
.loop:
|
||||
%if cpuflag(avx2)
|
||||
vpermps m0, m2, [src1q]
|
||||
vpermps m1, m2, [src1q+mmsize]
|
||||
%elif cpuflag(avx)
|
||||
vmovaps xmm0, [src1q + 16]
|
||||
vinsertf128 m0, m0, [src1q], 1
|
||||
vshufps m0, m0, m0, q0123
|
||||
vmovaps xmm1, [src1q + mmsize + 16]
|
||||
vinsertf128 m1, m1, [src1q + mmsize], 1
|
||||
vshufps m1, m1, m1, q0123
|
||||
%else
|
||||
mova m0, [src1q]
|
||||
mova m1, [src1q + mmsize]
|
||||
shufps m0, m0, q0123
|
||||
shufps m1, m1, q0123
|
||||
%endif
|
||||
mulps m0, m0, [src0q + lenq + mmsize]
|
||||
mulps m1, m1, [src0q + lenq]
|
||||
movaps [dstq + lenq + mmsize], m0
|
||||
movaps [dstq + lenq], m1
|
||||
add src1q, 2*mmsize
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_REVERSE
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_FMUL_REVERSE
|
||||
%endif
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2
|
||||
VECTOR_FMUL_REVERSE
|
||||
%endif
|
||||
|
||||
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
|
||||
INIT_XMM sse
|
||||
cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
||||
shl offsetd, 2
|
||||
add v1q, offsetq
|
||||
add v2q, offsetq
|
||||
neg offsetq
|
||||
xorps xmm0, xmm0
|
||||
.loop:
|
||||
movaps xmm1, [v1q+offsetq]
|
||||
mulps xmm1, [v2q+offsetq]
|
||||
addps xmm0, xmm1
|
||||
add offsetq, 16
|
||||
js .loop
|
||||
movhlps xmm1, xmm0
|
||||
addps xmm0, xmm1
|
||||
movss xmm1, xmm0
|
||||
shufps xmm0, xmm0, 1
|
||||
addss xmm0, xmm1
|
||||
%if ARCH_X86_64 == 0
|
||||
movss r0m, xmm0
|
||||
fld dword r0m
|
||||
%endif
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_butterflies_float(float *src0, float *src1, int len);
|
||||
;-----------------------------------------------------------------------------
|
||||
INIT_XMM sse
|
||||
cglobal butterflies_float, 3,3,3, src0, src1, len
|
||||
shl lend, 2
|
||||
add src0q, lenq
|
||||
add src1q, lenq
|
||||
neg lenq
|
||||
.loop:
|
||||
mova m0, [src0q + lenq]
|
||||
mova m1, [src1q + lenq]
|
||||
subps m2, m0, m1
|
||||
addps m0, m0, m1
|
||||
mova [src1q + lenq], m2
|
||||
mova [src0q + lenq], m0
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
REP_RET
|
121
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/float_dsp_init.c
vendored
Normal file
121
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/float_dsp_init.c
vendored
Normal file
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "cpu.h"
|
||||
#include "asm.h"
|
||||
|
||||
void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1,
|
||||
int len);
|
||||
void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmac_scalar_sse2(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
void ff_vector_dmac_scalar_avx(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
void ff_vector_dmac_scalar_fma3(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
void ff_vector_dmul_scalar_avx(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
|
||||
void ff_vector_fmul_window_3dnowext(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
void ff_vector_fmul_window_sse(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
|
||||
void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1,
|
||||
const float *src2, int len);
|
||||
|
||||
void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||
|
||||
void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len);
|
||||
|
||||
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_3dnowext;
|
||||
}
|
||||
if (EXTERNAL_SSE(cpu_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||
fdsp->vector_fmul_window = ff_vector_fmul_window_sse;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
||||
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||
fdsp->butterflies_float = ff_butterflies_float_sse;
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
fdsp->vector_dmul = ff_vector_dmul_sse2;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||
fdsp->vector_dmul = ff_vector_dmul_avx;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_fma3;
|
||||
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3;
|
||||
}
|
||||
}
|
53
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/imgutils.asm
vendored
Normal file
53
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/imgutils.asm
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
;*****************************************************************************
|
||||
;* Copyright 2016 Anton Khirnov
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos
|
||||
add dstq, bwq
|
||||
add srcq, bwq
|
||||
neg bwq
|
||||
|
||||
.row_start:
|
||||
mov rowposq, bwq
|
||||
|
||||
.loop:
|
||||
movntdqa m0, [srcq + rowposq + 0 * mmsize]
|
||||
movntdqa m1, [srcq + rowposq + 1 * mmsize]
|
||||
movntdqa m2, [srcq + rowposq + 2 * mmsize]
|
||||
movntdqa m3, [srcq + rowposq + 3 * mmsize]
|
||||
|
||||
mova [dstq + rowposq + 0 * mmsize], m0
|
||||
mova [dstq + rowposq + 1 * mmsize], m1
|
||||
mova [dstq + rowposq + 2 * mmsize], m2
|
||||
mova [dstq + rowposq + 3 * mmsize], m3
|
||||
|
||||
add rowposq, 4 * mmsize
|
||||
jnz .loop
|
||||
|
||||
add srcq, src_linesizeq
|
||||
add dstq, dst_linesizeq
|
||||
dec heightd
|
||||
jnz .row_start
|
||||
|
||||
RET
|
49
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/imgutils_init.c
vendored
Normal file
49
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/imgutils_init.c
vendored
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/error.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/imgutils_internal.h"
|
||||
#include "libavutil/internal.h"
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize,
|
||||
const uint8_t *src, ptrdiff_t src_linesize,
|
||||
ptrdiff_t bytewidth, int height);
|
||||
|
||||
int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize,
|
||||
const uint8_t *src, ptrdiff_t src_linesize,
|
||||
ptrdiff_t bytewidth, int height)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64);
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) &&
|
||||
bw_aligned <= dst_linesize && bw_aligned <= src_linesize)
|
||||
ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize,
|
||||
bw_aligned, height);
|
||||
else
|
||||
return AVERROR(ENOSYS);
|
||||
|
||||
return 0;
|
||||
}
|
139
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/intmath.h
vendored
Normal file
139
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/intmath.h
vendored
Normal file
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright (c) 2015 James Almer
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_INTMATH_H
|
||||
#define AVUTIL_X86_INTMATH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#if HAVE_FAST_CLZ
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#include "config.h"
|
||||
|
||||
#if HAVE_FAST_CLZ
|
||||
#if (defined(__INTEL_COMPILER) && (__INTEL_COMPILER>=1216)) || defined(_MSC_VER)
|
||||
# if defined(__INTEL_COMPILER)
|
||||
# define ff_log2(x) (_bit_scan_reverse((x)|1))
|
||||
# else
|
||||
# define ff_log2 ff_log2_x86
|
||||
static av_always_inline av_const int ff_log2_x86(unsigned int v)
|
||||
{
|
||||
unsigned long n;
|
||||
_BitScanReverse(&n, v|1);
|
||||
return n;
|
||||
}
|
||||
# endif
|
||||
# define ff_log2_16bit av_log2
|
||||
|
||||
#if defined(__INTEL_COMPILER) || (defined(_MSC_VER) && (_MSC_VER >= 1700) && \
|
||||
(defined(__BMI__) || !defined(__clang__)))
|
||||
# define ff_ctz(v) _tzcnt_u32(v)
|
||||
|
||||
# if ARCH_X86_64
|
||||
# define ff_ctzll(v) _tzcnt_u64(v)
|
||||
# else
|
||||
# define ff_ctzll ff_ctzll_x86
|
||||
static av_always_inline av_const int ff_ctzll_x86(long long v)
|
||||
{
|
||||
return ((uint32_t)v == 0) ? _tzcnt_u32((uint32_t)(v >> 32)) + 32 : _tzcnt_u32((uint32_t)v);
|
||||
}
|
||||
# endif
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#endif /* __INTEL_COMPILER */
|
||||
|
||||
#endif /* HAVE_FAST_CLZ */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
/* Our generic version of av_popcount is faster than GCC's built-in on
|
||||
* CPUs that don't support the popcnt instruction.
|
||||
*/
|
||||
#if defined(__POPCNT__)
|
||||
#define av_popcount __builtin_popcount
|
||||
#if ARCH_X86_64
|
||||
#define av_popcount64 __builtin_popcountll
|
||||
#endif
|
||||
|
||||
#endif /* __POPCNT__ */
|
||||
|
||||
#if defined(__BMI2__)
|
||||
|
||||
#if AV_GCC_VERSION_AT_LEAST(5,1)
|
||||
#define av_mod_uintp2 __builtin_ia32_bzhi_si
|
||||
#elif HAVE_INLINE_ASM
|
||||
/* GCC releases before 5.1.0 have a broken bzhi builtin, so for those we
|
||||
* implement it using inline assembly
|
||||
*/
|
||||
#define av_mod_uintp2 av_mod_uintp2_bmi2
|
||||
static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigned p)
|
||||
{
|
||||
if (av_builtin_constant_p(p))
|
||||
return a & ((1 << p) - 1);
|
||||
else {
|
||||
unsigned x;
|
||||
__asm__ ("bzhi %2, %1, %0 \n\t" : "=r"(x) : "rm"(a), "r"(p));
|
||||
return x;
|
||||
}
|
||||
}
|
||||
#endif /* AV_GCC_VERSION_AT_LEAST */
|
||||
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
#if defined(__SSE2__) && !defined(__INTEL_COMPILER)
|
||||
|
||||
#define av_clipd av_clipd_sse2
|
||||
static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
|
||||
{
|
||||
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
|
||||
if (amin > amax) abort();
|
||||
#endif
|
||||
__asm__ ("minsd %2, %0 \n\t"
|
||||
"maxsd %1, %0 \n\t"
|
||||
: "+&x"(a) : "xm"(amin), "xm"(amax));
|
||||
return a;
|
||||
}
|
||||
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
#if defined(__SSE__) && !defined(__INTEL_COMPILER)
|
||||
|
||||
#define av_clipf av_clipf_sse
|
||||
static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
|
||||
{
|
||||
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
|
||||
if (amin > amax) abort();
|
||||
#endif
|
||||
__asm__ ("minss %2, %0 \n\t"
|
||||
"maxss %1, %0 \n\t"
|
||||
: "+&x"(a) : "xm"(amin), "xm"(amax));
|
||||
return a;
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#endif /* AVUTIL_X86_INTMATH_H */
|
97
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/intreadwrite.h
vendored
Normal file
97
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/intreadwrite.h
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_INTREADWRITE_H
|
||||
#define AVUTIL_X86_INTREADWRITE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_MMX
|
||||
|
||||
#if !HAVE_FAST_64BIT && defined(__MMX__)
|
||||
|
||||
#define AV_COPY64 AV_COPY64
|
||||
static av_always_inline void AV_COPY64(void *d, const void *s)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
: "m" (*(const uint64_t*)s)
|
||||
: "mm0");
|
||||
}
|
||||
|
||||
#define AV_SWAP64 AV_SWAP64
|
||||
static av_always_inline void AV_SWAP64(void *a, void *b)
|
||||
{
|
||||
__asm__("movq %1, %%mm0 \n\t"
|
||||
"movq %0, %%mm1 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
"movq %%mm1, %1 \n\t"
|
||||
: "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b)
|
||||
::"mm0", "mm1");
|
||||
}
|
||||
|
||||
#define AV_ZERO64 AV_ZERO64
|
||||
static av_always_inline void AV_ZERO64(void *d)
|
||||
{
|
||||
__asm__("pxor %%mm0, %%mm0 \n\t"
|
||||
"movq %%mm0, %0 \n\t"
|
||||
: "=m"(*(uint64_t*)d)
|
||||
:: "mm0");
|
||||
}
|
||||
|
||||
#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */
|
||||
|
||||
#ifdef __SSE__
|
||||
|
||||
#define AV_COPY128 AV_COPY128
|
||||
static av_always_inline void AV_COPY128(void *d, const void *s)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("movaps %1, %%xmm0 \n\t"
|
||||
"movaps %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
: "m" (*(const struct v*)s)
|
||||
: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
#define AV_ZERO128 AV_ZERO128
|
||||
static av_always_inline void AV_ZERO128(void *d)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("pxor %%xmm0, %%xmm0 \n\t"
|
||||
"movdqa %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
:: "xmm0");
|
||||
}
|
||||
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
#endif /* HAVE_MMX */
|
||||
|
||||
#endif /* AVUTIL_X86_INTREADWRITE_H */
|
290
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/lls.asm
vendored
Normal file
290
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/lls.asm
vendored
Normal file
|
@ -0,0 +1,290 @@
|
|||
;******************************************************************************
|
||||
;* linear least squares model
|
||||
;*
|
||||
;* Copyright (c) 2013 Loren Merritt
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
%define MAX_VARS 32
|
||||
%define MAX_VARS_ALIGN (MAX_VARS+4)
|
||||
%define COVAR_STRIDE MAX_VARS_ALIGN*8
|
||||
%define COVAR(x,y) [covarq + (x)*8 + (y)*COVAR_STRIDE]
|
||||
|
||||
struc LLSModel
|
||||
.covariance: resq MAX_VARS_ALIGN*MAX_VARS_ALIGN
|
||||
.coeff: resq MAX_VARS*MAX_VARS
|
||||
.variance: resq MAX_VARS
|
||||
.indep_count: resd 1
|
||||
endstruc
|
||||
|
||||
%macro ADDPD_MEM 2
|
||||
%if cpuflag(avx)
|
||||
vaddpd %2, %2, %1
|
||||
%else
|
||||
addpd %2, %1
|
||||
%endif
|
||||
mova %1, %2
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
%define movdqa movaps
|
||||
cglobal update_lls, 2,5,8, ctx, var, i, j, covar2
|
||||
%define covarq ctxq
|
||||
mov id, [ctxq + LLSModel.indep_count]
|
||||
lea varq, [varq + iq*8]
|
||||
neg iq
|
||||
mov covar2q, covarq
|
||||
.loopi:
|
||||
; Compute all 3 pairwise products of a 2x2 block that lies on the diagonal
|
||||
mova m1, [varq + iq*8]
|
||||
mova m3, [varq + iq*8 + 16]
|
||||
pshufd m4, m1, q1010
|
||||
pshufd m5, m1, q3232
|
||||
pshufd m6, m3, q1010
|
||||
pshufd m7, m3, q3232
|
||||
mulpd m0, m1, m4
|
||||
mulpd m1, m1, m5
|
||||
lea covarq, [covar2q + 16]
|
||||
ADDPD_MEM COVAR(-2,0), m0
|
||||
ADDPD_MEM COVAR(-2,1), m1
|
||||
lea jq, [iq + 2]
|
||||
cmp jd, -2
|
||||
jg .skip4x4
|
||||
.loop4x4:
|
||||
; Compute all 16 pairwise products of a 4x4 block
|
||||
mulpd m0, m4, m3
|
||||
mulpd m1, m5, m3
|
||||
mulpd m2, m6, m3
|
||||
mulpd m3, m3, m7
|
||||
ADDPD_MEM COVAR(0,0), m0
|
||||
ADDPD_MEM COVAR(0,1), m1
|
||||
ADDPD_MEM COVAR(0,2), m2
|
||||
ADDPD_MEM COVAR(0,3), m3
|
||||
mova m3, [varq + jq*8 + 16]
|
||||
mulpd m0, m4, m3
|
||||
mulpd m1, m5, m3
|
||||
mulpd m2, m6, m3
|
||||
mulpd m3, m3, m7
|
||||
ADDPD_MEM COVAR(2,0), m0
|
||||
ADDPD_MEM COVAR(2,1), m1
|
||||
ADDPD_MEM COVAR(2,2), m2
|
||||
ADDPD_MEM COVAR(2,3), m3
|
||||
mova m3, [varq + jq*8 + 32]
|
||||
add covarq, 32
|
||||
add jq, 4
|
||||
cmp jd, -2
|
||||
jle .loop4x4
|
||||
.skip4x4:
|
||||
test jd, jd
|
||||
jg .skip2x4
|
||||
mulpd m4, m3
|
||||
mulpd m5, m3
|
||||
mulpd m6, m3
|
||||
mulpd m7, m3
|
||||
ADDPD_MEM COVAR(0,0), m4
|
||||
ADDPD_MEM COVAR(0,1), m5
|
||||
ADDPD_MEM COVAR(0,2), m6
|
||||
ADDPD_MEM COVAR(0,3), m7
|
||||
.skip2x4:
|
||||
add iq, 4
|
||||
add covar2q, 4*COVAR_STRIDE+32
|
||||
cmp id, -2
|
||||
jle .loopi
|
||||
test id, id
|
||||
jg .ret
|
||||
mov jq, iq
|
||||
%define covarq covar2q
|
||||
.loop2x1:
|
||||
movsd m0, [varq + iq*8]
|
||||
movlhps m0, m0
|
||||
mulpd m0, [varq + jq*8]
|
||||
ADDPD_MEM COVAR(0,0), m0
|
||||
inc iq
|
||||
add covarq, COVAR_STRIDE
|
||||
test id, id
|
||||
jle .loop2x1
|
||||
.ret:
|
||||
REP_RET
|
||||
|
||||
%macro UPDATE_LLS 0
|
||||
cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
|
||||
%define covarq ctxq
|
||||
mov countd, [ctxq + LLSModel.indep_count]
|
||||
lea count2d, [countq-2]
|
||||
xor id, id
|
||||
.loopi:
|
||||
; Compute all 10 pairwise products of a 4x4 block that lies on the diagonal
|
||||
mova ymm1, [varq + iq*8]
|
||||
vbroadcastsd ymm4, [varq + iq*8]
|
||||
vbroadcastsd ymm5, [varq + iq*8 + 8]
|
||||
vbroadcastsd ymm6, [varq + iq*8 + 16]
|
||||
vbroadcastsd ymm7, [varq + iq*8 + 24]
|
||||
vextractf128 xmm3, ymm1, 1
|
||||
%if cpuflag(fma3)
|
||||
mova ymm0, COVAR(iq ,0)
|
||||
mova xmm2, COVAR(iq+2,2)
|
||||
fmaddpd ymm0, ymm1, ymm4, ymm0
|
||||
fmaddpd xmm2, xmm3, xmm6, xmm2
|
||||
fmaddpd ymm1, ymm5, ymm1, COVAR(iq ,1)
|
||||
fmaddpd xmm3, xmm7, xmm3, COVAR(iq+2,3)
|
||||
mova COVAR(iq ,0), ymm0
|
||||
mova COVAR(iq ,1), ymm1
|
||||
mova COVAR(iq+2,2), xmm2
|
||||
mova COVAR(iq+2,3), xmm3
|
||||
%else
|
||||
vmulpd ymm0, ymm1, ymm4
|
||||
vmulpd ymm1, ymm1, ymm5
|
||||
vmulpd xmm2, xmm3, xmm6
|
||||
vmulpd xmm3, xmm3, xmm7
|
||||
ADDPD_MEM COVAR(iq ,0), ymm0
|
||||
ADDPD_MEM COVAR(iq ,1), ymm1
|
||||
ADDPD_MEM COVAR(iq+2,2), xmm2
|
||||
ADDPD_MEM COVAR(iq+2,3), xmm3
|
||||
%endif ; cpuflag(fma3)
|
||||
lea jd, [iq + 4]
|
||||
cmp jd, count2d
|
||||
jg .skip4x4
|
||||
.loop4x4:
|
||||
; Compute all 16 pairwise products of a 4x4 block
|
||||
mova ymm3, [varq + jq*8]
|
||||
%if cpuflag(fma3)
|
||||
mova ymm0, COVAR(jq, 0)
|
||||
mova ymm1, COVAR(jq, 1)
|
||||
mova ymm2, COVAR(jq, 2)
|
||||
fmaddpd ymm0, ymm3, ymm4, ymm0
|
||||
fmaddpd ymm1, ymm3, ymm5, ymm1
|
||||
fmaddpd ymm2, ymm3, ymm6, ymm2
|
||||
fmaddpd ymm3, ymm7, ymm3, COVAR(jq,3)
|
||||
mova COVAR(jq, 0), ymm0
|
||||
mova COVAR(jq, 1), ymm1
|
||||
mova COVAR(jq, 2), ymm2
|
||||
mova COVAR(jq, 3), ymm3
|
||||
%else
|
||||
vmulpd ymm0, ymm3, ymm4
|
||||
vmulpd ymm1, ymm3, ymm5
|
||||
vmulpd ymm2, ymm3, ymm6
|
||||
vmulpd ymm3, ymm3, ymm7
|
||||
ADDPD_MEM COVAR(jq,0), ymm0
|
||||
ADDPD_MEM COVAR(jq,1), ymm1
|
||||
ADDPD_MEM COVAR(jq,2), ymm2
|
||||
ADDPD_MEM COVAR(jq,3), ymm3
|
||||
%endif ; cpuflag(fma3)
|
||||
add jd, 4
|
||||
cmp jd, count2d
|
||||
jle .loop4x4
|
||||
.skip4x4:
|
||||
cmp jd, countd
|
||||
jg .skip2x4
|
||||
mova xmm3, [varq + jq*8]
|
||||
%if cpuflag(fma3)
|
||||
mova xmm0, COVAR(jq, 0)
|
||||
mova xmm1, COVAR(jq, 1)
|
||||
mova xmm2, COVAR(jq, 2)
|
||||
fmaddpd xmm0, xmm3, xmm4, xmm0
|
||||
fmaddpd xmm1, xmm3, xmm5, xmm1
|
||||
fmaddpd xmm2, xmm3, xmm6, xmm2
|
||||
fmaddpd xmm3, xmm7, xmm3, COVAR(jq,3)
|
||||
mova COVAR(jq, 0), xmm0
|
||||
mova COVAR(jq, 1), xmm1
|
||||
mova COVAR(jq, 2), xmm2
|
||||
mova COVAR(jq, 3), xmm3
|
||||
%else
|
||||
vmulpd xmm0, xmm3, xmm4
|
||||
vmulpd xmm1, xmm3, xmm5
|
||||
vmulpd xmm2, xmm3, xmm6
|
||||
vmulpd xmm3, xmm3, xmm7
|
||||
ADDPD_MEM COVAR(jq,0), xmm0
|
||||
ADDPD_MEM COVAR(jq,1), xmm1
|
||||
ADDPD_MEM COVAR(jq,2), xmm2
|
||||
ADDPD_MEM COVAR(jq,3), xmm3
|
||||
%endif ; cpuflag(fma3)
|
||||
.skip2x4:
|
||||
add id, 4
|
||||
add covarq, 4*COVAR_STRIDE
|
||||
cmp id, count2d
|
||||
jle .loopi
|
||||
cmp id, countd
|
||||
jg .ret
|
||||
mov jd, id
|
||||
.loop2x1:
|
||||
vmovddup xmm0, [varq + iq*8]
|
||||
%if cpuflag(fma3)
|
||||
mova xmm1, [varq + jq*8]
|
||||
fmaddpd xmm0, xmm1, xmm0, COVAR(jq,0)
|
||||
mova COVAR(jq,0), xmm0
|
||||
%else
|
||||
vmulpd xmm0, [varq + jq*8]
|
||||
ADDPD_MEM COVAR(jq,0), xmm0
|
||||
%endif ; cpuflag(fma3)
|
||||
inc id
|
||||
add covarq, COVAR_STRIDE
|
||||
cmp id, countd
|
||||
jle .loop2x1
|
||||
.ret:
|
||||
REP_RET
|
||||
%endmacro ; UPDATE_LLS
|
||||
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
UPDATE_LLS
|
||||
%endif
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
UPDATE_LLS
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal evaluate_lls, 3,4,2, ctx, var, order, i
|
||||
; This function is often called on the same buffer as update_lls, but with
|
||||
; an offset. They can't both be aligned.
|
||||
; Load halves rather than movu to avoid store-forwarding stalls, since the
|
||||
; input was initialized immediately prior to this function using scalar math.
|
||||
%define coefsq ctxq
|
||||
mov id, orderd
|
||||
imul orderd, MAX_VARS
|
||||
lea coefsq, [ctxq + LLSModel.coeff + orderq*8]
|
||||
movsd m0, [varq]
|
||||
movhpd m0, [varq + 8]
|
||||
mulpd m0, [coefsq]
|
||||
lea coefsq, [coefsq + iq*8]
|
||||
lea varq, [varq + iq*8]
|
||||
neg iq
|
||||
add iq, 2
|
||||
.loop:
|
||||
movsd m1, [varq + iq*8]
|
||||
movhpd m1, [varq + iq*8 + 8]
|
||||
mulpd m1, [coefsq + iq*8]
|
||||
addpd m0, m1
|
||||
add iq, 2
|
||||
jl .loop
|
||||
jg .skip1
|
||||
movsd m1, [varq + iq*8]
|
||||
mulsd m1, [coefsq + iq*8]
|
||||
addpd m0, m1
|
||||
.skip1:
|
||||
movhlps m1, m0
|
||||
addsd m0, m1
|
||||
%if ARCH_X86_32
|
||||
movsd r0m, m0
|
||||
fld qword r0m
|
||||
%endif
|
||||
RET
|
45
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/lls_init.c
vendored
Normal file
45
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/lls_init.c
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* linear least squares model
|
||||
*
|
||||
* Copyright (c) 2013 Loren Merritt
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/lls.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
|
||||
void ff_update_lls_sse2(LLSModel *m, const double *var);
|
||||
void ff_update_lls_avx(LLSModel *m, const double *var);
|
||||
void ff_update_lls_fma3(LLSModel *m, const double *var);
|
||||
double ff_evaluate_lls_sse2(LLSModel *m, const double *var, int order);
|
||||
|
||||
av_cold void ff_init_lls_x86(LLSModel *m)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_sse2;
|
||||
if (m->indep_count >= 4)
|
||||
m->evaluate_lls = ff_evaluate_lls_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_avx;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_fma3;
|
||||
}
|
||||
}
|
26
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/pixelutils.h
vendored
Normal file
26
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/pixelutils.h
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_PIXELUTILS_H
|
||||
#define AVUTIL_X86_PIXELUTILS_H
|
||||
|
||||
#include "libavutil/pixelutils.h"
|
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned);
|
||||
|
||||
#endif /* AVUTIL_X86_PIXELUTILS_H */
|
94
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/pixelutils_init.c
vendored
Normal file
94
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/pixelutils_init.c
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pixelutils.h"
|
||||
#include "cpu.h"
|
||||
|
||||
int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
sad[2] = ff_pixelutils_sad_8x8_mmx;
|
||||
}
|
||||
|
||||
// The best way to use SSE2 would be to do 2 SADs in parallel,
|
||||
// but we'd have to modify the pixelutils API to return SIMD functions.
|
||||
|
||||
// It's probably not faster to shuffle data around
|
||||
// to get two lines of 8 pixels into a single 16byte register,
|
||||
// so just use the MMX 8x8 version even when SSE2 is available.
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
sad[2] = ff_pixelutils_sad_8x8_mmxext;
|
||||
sad[3] = ff_pixelutils_sad_16x16_mmxext;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
switch (aligned) {
|
||||
case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned
|
||||
case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned
|
||||
case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned
|
||||
}
|
||||
}
|
||||
}
|
50
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/timer.h
vendored
Normal file
50
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/timer.h
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_TIMER_H
|
||||
#define AVUTIL_X86_TIMER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
#define FF_TIMER_UNITS "decicycles"
|
||||
#define AV_READ_TIME read_time
|
||||
|
||||
static inline uint64_t read_time(void)
|
||||
{
|
||||
uint32_t a, d;
|
||||
__asm__ volatile(
|
||||
#if ARCH_X86_64 || defined(__SSE2__)
|
||||
"lfence \n\t"
|
||||
#endif
|
||||
"rdtsc \n\t"
|
||||
: "=a" (a), "=d" (d));
|
||||
return ((uint64_t)d << 32) + a;
|
||||
}
|
||||
|
||||
#elif HAVE_RDTSC
|
||||
|
||||
#include <intrin.h>
|
||||
#define AV_READ_TIME __rdtsc
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
#endif /* AVUTIL_X86_TIMER_H */
|
78
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/w64xmmtest.h
vendored
Normal file
78
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/w64xmmtest.h
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* check XMM registers for clobbers on Win64
|
||||
* Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_X86_W64XMMTEST_H
|
||||
#define AVUTIL_X86_W64XMMTEST_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
|
||||
#define storexmmregs(mem) \
|
||||
__asm__ volatile( \
|
||||
"movups %%xmm6 , 0x00(%0)\n\t" \
|
||||
"movups %%xmm7 , 0x10(%0)\n\t" \
|
||||
"movups %%xmm8 , 0x20(%0)\n\t" \
|
||||
"movups %%xmm9 , 0x30(%0)\n\t" \
|
||||
"movups %%xmm10, 0x40(%0)\n\t" \
|
||||
"movups %%xmm11, 0x50(%0)\n\t" \
|
||||
"movups %%xmm12, 0x60(%0)\n\t" \
|
||||
"movups %%xmm13, 0x70(%0)\n\t" \
|
||||
"movups %%xmm14, 0x80(%0)\n\t" \
|
||||
"movups %%xmm15, 0x90(%0)\n\t" \
|
||||
:: "r"(mem) : "memory")
|
||||
|
||||
#define testxmmclobbers(func, ctx, ...) \
|
||||
uint64_t xmm[2][10][2]; \
|
||||
int ret; \
|
||||
storexmmregs(xmm[0]); \
|
||||
ret = __real_ ## func(ctx, __VA_ARGS__); \
|
||||
storexmmregs(xmm[1]); \
|
||||
if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \
|
||||
int i; \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"XMM REGS CLOBBERED IN %s!\n", #func); \
|
||||
for (i = 0; i < 10; i ++) \
|
||||
if (xmm[0][i][0] != xmm[1][i][0] || \
|
||||
xmm[0][i][1] != xmm[1][i][1]) { \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
"xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \
|
||||
6 + i, av_bswap64(xmm[0][i][0]), \
|
||||
av_bswap64(xmm[0][i][1])); \
|
||||
av_log(ctx, AV_LOG_ERROR, \
|
||||
" -> %016"PRIx64"%016"PRIx64"\n", \
|
||||
av_bswap64(xmm[1][i][0]), \
|
||||
av_bswap64(xmm[1][i][1])); \
|
||||
} \
|
||||
abort(); \
|
||||
} \
|
||||
return ret
|
||||
|
||||
#define wrap(func) \
|
||||
int __real_ ## func; \
|
||||
int __wrap_ ## func; \
|
||||
int __wrap_ ## func
|
||||
|
||||
#endif /* AVUTIL_X86_W64XMMTEST_H */
|
1701
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/x86inc.asm
vendored
Normal file
1701
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/x86inc.asm
vendored
Normal file
File diff suppressed because it is too large
Load diff
1028
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/x86util.asm
vendored
Normal file
1028
trunk/3rdparty/ffmpeg-4-fit/libavutil/x86/x86util.asm
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue