Squash #1685, #1282, #1547: Support ARM platform. 5.0.5

2025-03-09 15:49:59 +00:00 · 2021-06-22 07:49:48 +08:00 · 2021-06-22 07:49:48 +08:00 · a594678e3d
commit a594678e3d
parent efdbf37255
285 changed files with 12724 additions and 1671 deletions
--- a/trunk/3rdparty/ffmpeg-4-fit/libavutil/hwcontext_cuda.c
+++ b/trunk/3rdparty/ffmpeg-4-fit/libavutil/hwcontext_cuda.c
@ -21,7 +21,6 @@
 #include "hwcontext.h"
 #include "hwcontext_internal.h"
 #include "hwcontext_cuda_internal.h"
-#include "cuda_check.h"
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
@ -44,8 +43,6 @@ static const enum AVPixelFormat supported_formats[] = {
    AV_PIX_FMT_0BGR32,
 };

-#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
-
 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
                                       const void *hwconfig,
                                       AVHWFramesConstraints *constraints)
@ -73,48 +70,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,

 static void cuda_buffer_free(void *opaque, uint8_t *data)
 {
-    AVHWFramesContext        *ctx = opaque;
-    AVHWDeviceContext *device_ctx = ctx->device_ctx;
-    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
-    CudaFunctions             *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext *ctx = opaque;
+    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions *cu = hwctx->internal->cuda_dl;

    CUcontext dummy;

-    CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    cu->cuCtxPushCurrent(hwctx->cuda_ctx);

-    CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
+    cu->cuMemFree((CUdeviceptr)data);

-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    cu->cuCtxPopCurrent(&dummy);
 }

 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
 {
-    AVHWFramesContext        *ctx = opaque;
-    AVHWDeviceContext *device_ctx = ctx->device_ctx;
-    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
-    CudaFunctions             *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext     *ctx = opaque;
+    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions          *cu = hwctx->internal->cuda_dl;

    AVBufferRef *ret = NULL;
    CUcontext dummy = NULL;
    CUdeviceptr data;
-    int err;
+    CUresult err;

-    err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
-    if (err < 0)
+    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
        return NULL;
+    }

-    err = CHECK_CU(cu->cuMemAlloc(&data, size));
-    if (err < 0)
+    err = cu->cuMemAlloc(&data, size);
+    if (err != CUDA_SUCCESS)
        goto fail;

    ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
    if (!ret) {
-        CHECK_CU(cu->cuMemFree(data));
+        cu->cuMemFree(data);
        goto fail;
    }

 fail:
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    cu->cuCtxPopCurrent(&dummy);
    return ret;
 }

@ -197,17 +194,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                   const AVFrame *src)
 {
-    CUDAFramesContext       *priv = ctx->internal->priv;
-    AVHWDeviceContext *device_ctx = ctx->device_ctx;
-    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
-    CudaFunctions             *cu = hwctx->internal->cuda_dl;
+    CUDAFramesContext           *priv = ctx->internal->priv;
+    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
-    int i, ret;
+    CUresult err;
+    int i;

-    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
-    if (ret < 0)
-        return ret;
+    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
@ -221,17 +218,20 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

-        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
-        if (ret < 0)
-            goto exit;
+        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
+        if (err != CUDA_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+            return AVERROR_UNKNOWN;
+        }
    }

-    ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
-    if (ret < 0)
-        goto exit;
+    err = cu->cuStreamSynchronize(device_hwctx->stream);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
+        return AVERROR_UNKNOWN;
+    }

-exit:
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    cu->cuCtxPopCurrent(&dummy);

    return 0;
 }
@ -239,17 +239,17 @@ exit:
 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
                                 const AVFrame *src)
 {
-    CUDAFramesContext       *priv = ctx->internal->priv;
-    AVHWDeviceContext *device_ctx = ctx->device_ctx;
-    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
-    CudaFunctions             *cu = hwctx->internal->cuda_dl;
+    CUDAFramesContext           *priv = ctx->internal->priv;
+    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;

    CUcontext dummy;
-    int i, ret;
+    CUresult err;
+    int i;

-    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
-    if (ret < 0)
-        return ret;
+    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS)
+        return AVERROR_UNKNOWN;

    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
        CUDA_MEMCPY2D cpy = {
@ -263,25 +263,31 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
            .Height        = src->height >> (i ? priv->shift_height : 0),
        };

-        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
-        if (ret < 0)
-            goto exit;
+        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
+        if (err != CUDA_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n");
+            return AVERROR_UNKNOWN;
+        }
    }

-exit:
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    err = cu->cuStreamSynchronize(device_hwctx->stream);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    cu->cuCtxPopCurrent(&dummy);

    return 0;
 }

-static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
+static void cuda_device_uninit(AVHWDeviceContext *ctx)
 {
-    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
+    AVCUDADeviceContext *hwctx = ctx->hwctx;

    if (hwctx->internal) {
-        CudaFunctions *cu = hwctx->internal->cuda_dl;
        if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
-            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
+            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
            hwctx->cuda_ctx = NULL;
        }
        cuda_free_functions(&hwctx->internal->cuda_dl);
@ -316,47 +322,53 @@ error:
    return ret;
 }

-static int cuda_device_create(AVHWDeviceContext *device_ctx,
-                              const char *device,
+static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
                              AVDictionary *opts, int flags)
 {
-    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
+    AVCUDADeviceContext *hwctx = ctx->hwctx;
    CudaFunctions *cu;
    CUdevice cu_device;
    CUcontext dummy;
-    int ret, device_idx = 0;
+    CUresult err;
+    int device_idx = 0;

    if (device)
        device_idx = strtol(device, NULL, 0);

-    if (cuda_device_init(device_ctx) < 0)
+    if (cuda_device_init(ctx) < 0)
        goto error;

    cu = hwctx->internal->cuda_dl;

-    ret = CHECK_CU(cu->cuInit(0));
-    if (ret < 0)
+    err = cu->cuInit(0);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
        goto error;
+    }

-    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
-    if (ret < 0)
+    err = cu->cuDeviceGet(&cu_device, device_idx);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
        goto error;
+    }

-    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
-    if (ret < 0)
+    err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
+    if (err != CUDA_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
        goto error;
+    }

    // Setting stream to NULL will make functions automatically use the default CUstream
    hwctx->stream = NULL;

-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    cu->cuCtxPopCurrent(&dummy);

    hwctx->internal->is_allocated = 1;

    return 0;

 error:
-    cuda_device_uninit(device_ctx);
+    cuda_device_uninit(ctx);
    return AVERROR_UNKNOWN;
 }