package org.nd4j.linalg.jcublas.ops.executioner;

import org.nd4j.linalg.api.blas.BlasBufferUtil;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.complex.IComplexNDArray;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.Accumulation;
import org.nd4j.linalg.api.ops.BroadcastOp;
import org.nd4j.linalg.api.ops.Op;
import org.nd4j.linalg.api.ops.ScalarOp;
import org.nd4j.linalg.api.ops.TransformOp;
import org.nd4j.linalg.api.ops.executioner.DefaultOpExecutioner;
import org.nd4j.linalg.api.ops.executioner.OpExecutioner;
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDimensions;
import org.nd4j.linalg.api.ops.impl.transforms.arithmetic.CopyOp;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.SimpleJCublas;
import org.nd4j.linalg.jcublas.buffer.JCudaBuffer;
import org.nd4j.linalg.jcublas.context.ContextHolder;
import org.nd4j.linalg.jcublas.context.CudaContext;
import org.nd4j.linalg.jcublas.gpumetrics.GpuMetrics;
import org.nd4j.linalg.jcublas.kernel.KernelFunctionLoader;
import org.nd4j.linalg.jcublas.kernel.KernelFunctions;
import org.nd4j.linalg.jcublas.util.KernelParamsWrapper;
import org.nd4j.linalg.jcublas.util.PointerUtil;
import org.nd4j.linalg.util.ArrayUtil;

/* loaded from: input_file:org/nd4j/linalg/jcublas/ops/executioner/JCudaExecutioner.class */
public class JCudaExecutioner extends DefaultOpExecutioner {
    private JCudaBuffer dummyFloatPointer;
    private JCudaBuffer dummyDoublePointer;

    public JCudaExecutioner() {
        try {
            SimpleJCublas.init();
            this.dummyFloatPointer = KernelFunctions.alloc(new float[]{1.0f});
            this.dummyDoublePointer = KernelFunctions.alloc(new double[]{1.0d});
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public INDArray exec(Accumulation accumulation, int... iArr) {
        ContextHolder.getInstance().setContext();
        for (int i = 0; i < iArr.length; i++) {
            if (iArr[i] < 0) {
                int i2 = i;
                iArr[i2] = iArr[i2] + accumulation.x().rank();
            }
        }
        if (iArr.length == accumulation.x().rank()) {
            iArr = new int[]{Integer.MAX_VALUE};
        }
        if (accumulation.isPassThrough()) {
            accumulation.exec(iArr);
            return accumulation.z();
        }
        if (iArr[0] == Integer.MAX_VALUE) {
            return accumulation.x() instanceof IComplexNDArray ? Nd4j.scalar(execAndReturn(accumulation).getFinalResultComplex()) : Nd4j.scalar(execAndReturn(accumulation).getFinalResult().doubleValue());
        }
        if (!(accumulation instanceof IComplexNDArray)) {
            int[] removeIndex = ArrayUtil.removeIndex(accumulation.x().shape(), iArr);
            if (removeIndex.length == 1) {
                removeIndex = iArr[0] == 0 ? new int[]{1, removeIndex[0]} : new int[]{removeIndex[0], 1};
            } else if (removeIndex.length == 0) {
                removeIndex = new int[]{1, 1};
            }
            if (ArrayUtil.prod(removeIndex) == accumulation.x().length()) {
                return accumulation.x();
            }
            INDArray create = Nd4j.create(removeIndex);
            invoke(accumulation, iArr, create, true);
            return create;
        }
        int[] removeIndex2 = ArrayUtil.removeIndex(accumulation.x().shape(), iArr);
        if (removeIndex2.length == 1) {
            removeIndex2 = iArr[0] == 0 ? new int[]{1, removeIndex2[0]} : new int[]{removeIndex2[0], 1};
        } else if (removeIndex2.length == 0) {
            removeIndex2 = new int[]{1, 1};
        }
        IComplexNDArray createComplex = Nd4j.createComplex(removeIndex2);
        for (int i3 = 0; i3 < accumulation.x().tensorssAlongDimension(iArr); i3++) {
            createComplex.putScalar(i3, execAndReturn((Accumulation) accumulation.opForDimension(i3, iArr)).getFinalResultComplex());
        }
        if (createComplex.ordering() == 'c') {
            createComplex.setStride(ArrayUtil.reverseCopy(createComplex.stride()));
        }
        return createComplex;
    }

    public INDArray execAndReturn(TransformOp transformOp, int... iArr) {
        ContextHolder.getInstance().setContext();
        return super.execAndReturn(transformOp, iArr);
    }

    public INDArray execAndReturn(ScalarOp scalarOp, int... iArr) {
        ContextHolder.getInstance().setContext();
        return super.execAndReturn(scalarOp, iArr);
    }

    public Op exec(Op op, int... iArr) {
        ContextHolder.getInstance().setContext();
        return super.exec(op, iArr);
    }

    public Op exec(Op op) {
        if ((op.x() instanceof IComplexNDArray) || executionMode() == OpExecutioner.ExecutionMode.JAVA || op.isPassThrough() || (op instanceof CopyOp)) {
            return super.exec(op);
        }
        if (op instanceof TransformOp) {
            invoke((TransformOp) op, true);
        } else if (op instanceof Accumulation) {
            invoke((Accumulation) op, null, Nd4j.scalar(0.0f), true);
        } else if (op instanceof ScalarOp) {
            invoke((ScalarOp) op, true);
        } else if (op instanceof BroadcastOp) {
            invoke((BroadcastOp) op, true);
        }
        return op;
    }

    private JCudaBuffer dummyDouble() {
        return this.dummyDoublePointer;
    }

    private JCudaBuffer dummyFloat() {
        return this.dummyFloatPointer;
    }

    public INDArray execAndReturn(TransformOp transformOp) {
        invoke(transformOp, true);
        return transformOp.z();
    }

    private JCudaBuffer toArgs(Object[] objArr, String str) {
        if (str.equals("double")) {
            return (objArr == null || objArr.length < 1) ? dummyDouble() : KernelFunctions.alloc(PointerUtil.toDoubles(objArr));
        }
        if (str.equals("float")) {
            return (objArr == null || objArr.length < 1) ? dummyFloat() : KernelFunctions.alloc(PointerUtil.toFloats(objArr));
        }
        throw new IllegalArgumentException("Illegal datatype");
    }

    public void calculateBlockResult(Accumulation accumulation, INDArray iNDArray) {
        int n = accumulation.n();
        accumulation.setX(iNDArray);
        accumulation.setApplyFinalTransform(false);
        double zeroDouble = accumulation.zeroDouble();
        for (int i = 0; i < iNDArray.length(); i++) {
            zeroDouble = accumulation.combineSubResults(iNDArray.data().getDouble(iNDArray.offset() + (i * iNDArray.elementWiseStride())), zeroDouble);
        }
        if (iNDArray.length() == 1) {
            zeroDouble = iNDArray.getDouble(0);
        }
        accumulation.setFinalResult(Double.valueOf(zeroDouble));
        accumulation.setApplyFinalTransform(true);
        accumulation.setN(n);
        accumulation.getAndSetFinalResult(accumulation.getFinalResult().doubleValue());
    }

    /* JADX WARN: Finally extract failed */
    private CudaContext invoke(BroadcastOp broadcastOp, boolean z) {
        ContextHolder.getInstance().setContext();
        if (!KernelFunctionLoader.getInstance().exists(broadcastOp.name()) || executionMode() == OpExecutioner.ExecutionMode.JAVA || broadcastOp.isPassThrough() || (broadcastOp instanceof CopyOp)) {
            super.exec(broadcastOp);
        }
        int[] dimensions = broadcastOp.getDimension() == null ? BroadcastDimensions.getDimensions(broadcastOp.y().shape()) : broadcastOp.getDimension();
        GpuMetrics blockAndThreads = GpuMetrics.blockAndThreads(getType(broadcastOp), broadcastOp.n());
        blockAndThreads.setGridSizeNotOverMax(512);
        blockAndThreads.setBlockSizeNotOverMax(((broadcastOp.n() + blockAndThreads.getGridSize()) - 1) / blockAndThreads.getGridSize());
        blockAndThreads.setSharedMemoryNotOverMax(1);
        if (broadcastOp.y() == null) {
            throw new IllegalArgumentException("Op has no y to broadcast");
        }
        try {
            KernelParamsWrapper resultArray = new KernelParamsWrapper(broadcastOp, z, broadcastOp.x(), KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(broadcastOp.x(), dimensions)), broadcastOp.y(), KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(broadcastOp.y())), broadcastOp.z(), KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(broadcastOp.z(), dimensions)), KernelFunctions.alloc(dimensions), Integer.valueOf(dimensions.length), KernelFunctions.alloc(blockAndThreads.getGpuDefinitionInfo())).setResultArray(broadcastOp.z());
            Throwable th = null;
            try {
                invokeFunction(broadcastOp, z, blockAndThreads, resultArray.getContext(), resultArray.getKernelParameters());
                CudaContext context = resultArray.getContext();
                if (z) {
                    resultArray.sync();
                }
                if (resultArray != null) {
                    if (0 != 0) {
                        try {
                            resultArray.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        resultArray.close();
                    }
                }
                return context;
            } catch (Throwable th3) {
                if (resultArray != null) {
                    if (0 != 0) {
                        try {
                            resultArray.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        resultArray.close();
                    }
                }
                throw th3;
            }
        } catch (Exception e) {
            throw new RuntimeException("Could not execute kernel: Kernel launch was: " + blockAndThreads, e);
        }
    }

    private int toInt(boolean z) {
        return z ? 1 : 0;
    }

    private CudaContext invoke(Accumulation accumulation, int[] iArr, INDArray iNDArray, boolean z) {
        KernelParamsWrapper resultOp;
        ContextHolder.getInstance().setContext();
        if (!KernelFunctionLoader.getInstance().exists(accumulation.name()) || executionMode() == OpExecutioner.ExecutionMode.JAVA || accumulation.isPassThrough()) {
            super.exec(accumulation);
        }
        GpuMetrics blockAndThreads = GpuMetrics.blockAndThreads(getType(accumulation), accumulation.n());
        if (iArr == null || iArr.length < 1 || iArr[0] == Integer.MAX_VALUE) {
            int n = accumulation.n() * accumulation.x().data().getElementSize();
            if (n < 1024) {
                n = 1024;
            }
            blockAndThreads.setSharedMemoryNotOverMax(n);
            iNDArray = Nd4j.create(blockAndThreads.getGridSize());
        } else {
            int tensorssAlongDimension = accumulation.x().tensorssAlongDimension(iArr);
            if (tensorssAlongDimension > 1000) {
                tensorssAlongDimension = 1000;
            }
            blockAndThreads.setGridSizeNotOverMax(tensorssAlongDimension);
            blockAndThreads.setBlockSizeNotOverMax(accumulation.x().tensorAlongDimension(0, iArr).length());
            int length = accumulation.x().tensorAlongDimension(0, iArr).length() * 10 * accumulation.x().data().getElementSize();
            if (length < 1024) {
                length = 1024;
            }
            blockAndThreads.setSharedMemoryNotOverMax(length);
        }
        if (accumulation.y() != null) {
            blockAndThreads.setSharedMemoryNotOverMax(blockAndThreads.getSharedMemory() * 2);
            if (BlasBufferUtil.getBlasStride(iArr == null ? accumulation.x() : accumulation.x().tensorAlongDimension(0, iArr)) < 0) {
                accumulation.setX(accumulation.x().dup());
            }
            if (BlasBufferUtil.getBlasStride(iArr == null ? accumulation.y() : accumulation.y().tensorAlongDimension(0, iArr)) < 0) {
                accumulation.setY(accumulation.y().dup());
            } else if (accumulation.y().ordering() != accumulation.x().ordering()) {
                accumulation.setY(accumulation.y().dup(accumulation.x().ordering()));
            }
            Object[] objArr = new Object[12];
            objArr[0] = Integer.valueOf(accumulation.n());
            objArr[1] = accumulation.x();
            objArr[2] = KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(accumulation.x(), iArr));
            objArr[3] = accumulation.y();
            objArr[4] = KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(accumulation.y(), iArr));
            objArr[5] = toArgs(accumulation.extraArgs(), getType(accumulation));
            objArr[6] = iNDArray;
            objArr[7] = KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(iNDArray));
            objArr[8] = KernelFunctions.alloc(blockAndThreads.getGpuDefinitionInfo());
            objArr[9] = KernelFunctions.alloc(iArr == null ? new int[]{Integer.MAX_VALUE} : iArr);
            objArr[10] = Integer.valueOf(iArr == null ? 1 : iArr.length);
            objArr[11] = Integer.valueOf(toInt((iArr == null || iArr[0] == Integer.MAX_VALUE) ? false : true));
            try {
                resultOp = new KernelParamsWrapper(accumulation, z, objArr).setResultOp(accumulation, iNDArray, iArr);
                Throwable th = null;
                try {
                    try {
                        invokeFunction(accumulation, z, blockAndThreads, resultOp.getContext(), resultOp.getKernelParameters());
                        CudaContext context = resultOp.getContext();
                        if (z) {
                            resultOp.sync();
                        }
                        if (resultOp != null) {
                            if (0 != 0) {
                                try {
                                    resultOp.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                resultOp.close();
                            }
                        }
                        return context;
                    } finally {
                    }
                } finally {
                }
            } catch (Exception e) {
                throw new RuntimeException("Could not execute kernel", e);
            }
        }
        int blasStride = BlasBufferUtil.getBlasStride(iArr == null ? accumulation.x() : accumulation.x().tensorAlongDimension(0, iArr));
        if (blasStride < 0) {
            accumulation.setX(accumulation.x().dup());
        }
        int n2 = accumulation.n() * accumulation.x().data().getElementSize();
        if (n2 < 1024) {
            n2 = 1024;
        }
        blockAndThreads.setSharedMemoryNotOverMax(n2);
        int length2 = accumulation.x().data().length();
        if (iArr == null && blasStride == 1 && accumulation.x().offset() == 0) {
            length2 = accumulation.n();
        }
        Object[] objArr2 = new Object[10];
        objArr2[0] = Integer.valueOf(length2);
        objArr2[1] = accumulation.x();
        objArr2[2] = KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(accumulation.x(), iArr));
        objArr2[3] = toArgs(accumulation.extraArgs(), getType(accumulation));
        objArr2[4] = iNDArray;
        objArr2[5] = KernelFunctions.alloc(PointerUtil.toShapeInfoBuffer(iNDArray));
        objArr2[6] = KernelFunctions.alloc(blockAndThreads.getGpuDefinitionInfo());
        objArr2[7] = KernelFunctions.alloc(iArr == null ? new int[]{Integer.MAX_VALUE} : iArr);
        objArr2[8] = Integer.valueOf(iArr == null ? 1 : iArr.length);
        objArr2[9] = Integer.valueOf(toInt((iArr == null || iArr[0] == Integer.MAX_VALUE) ? false : true));
        try {
            resultOp = new KernelParamsWrapper(accumulation, z, objArr2).setResultOp(accumulation, iNDArray, iArr);
            Throwable th3 = null;
            try {
                try {
                    invokeFunction(accumulation, z, blockAndThreads, resultOp.getContext(), resultOp.getKernelParameters());
                    CudaContext context2 = resultOp.getContext();
                    if (z) {
                        resultOp.sync();
                    }
                    if (resultOp != null) {
                        if (0 != 0) {
                            try {
                                resultOp.close();
                            } catch (Throwable th4) {
                                th3.addSuppressed(th4);
                            }
                        } else {
                            resultOp.close();
                        }
                    }
                    return context2;
                } finally {
                }
            } finally {
                if (resultOp != null) {
                    if (th3 != null) {
                        try {
                            resultOp.close();
                        } catch (Throwable th5) {
                            th3.addSuppressed(th5);
                        }
                    } else {
                        resultOp.close();
                    }
                }
            }
        } catch (Exception e2) {
            throw new RuntimeException("Could not execute kernel: Kernel launch was: " + blockAndThreads, e2);
        }
    }

    private CudaContext invoke(ScalarOp scalarOp, boolean z) {
        KernelParamsWrapper resultArray;
        CudaContext context;
        GpuMetrics blockAndThreads = GpuMetrics.blockAndThreads(getType(scalarOp), scalarOp.n());
        blockAndThreads.setGridSize(scalarOp.n());
        blockAndThreads.setBlockSize(1024);
        blockAndThreads.setSharedMemory(blockAndThreads.getBlockSize() * scalarOp.x().data().getElementSize());
        if (!KernelFunctionLoader.getInstance().exists(scalarOp.name()) || executionMode() == OpExecutioner.ExecutionMode.JAVA) {
            super.exec(scalarOp);
        }
        if (scalarOp.y() != null) {
            blockAndThreads.setSharedMemory(blockAndThreads.getSharedMemory() * 2);
            if (BlasBufferUtil.getBlasStride(scalarOp.x()) < 0) {
                scalarOp.setX(scalarOp.x().dup());
            }
            if (BlasBufferUtil.getBlasStride(scalarOp.y()) < 0) {
                scalarOp.setY(scalarOp.y().dup());
            }
            try {
                resultArray = new KernelParamsWrapper(scalarOp, z, Integer.valueOf(scalarOp.n()), Integer.valueOf(scalarOp.x().offset()), Integer.valueOf(scalarOp.y().offset()), scalarOp.x(), scalarOp.y(), Integer.valueOf(BlasBufferUtil.getBlasStride(scalarOp.x())), Integer.valueOf(BlasBufferUtil.getBlasStride(scalarOp.y())), toArgs(scalarOp.extraArgs(), getType(scalarOp)), scalarOp.z(), Integer.valueOf(blockAndThreads.getBlockSize())).setResultArray(scalarOp.z());
                Throwable th = null;
                try {
                    try {
                        invokeFunction(scalarOp, z, blockAndThreads, resultArray.getContext(), resultArray.getKernelParameters());
                        context = resultArray.getContext();
                        if (z) {
                            resultArray.sync();
                        }
                        if (resultArray != null) {
                            if (0 != 0) {
                                try {
                                    resultArray.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                resultArray.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            } catch (Exception e) {
                throw new RuntimeException("Could not execute kernel", e);
            }
        } else {
            if (BlasBufferUtil.getBlasStride(scalarOp.x()) < 0) {
                scalarOp.setX(scalarOp.x().dup());
            }
            try {
                resultArray = new KernelParamsWrapper(scalarOp, z, Integer.valueOf(scalarOp.n()), Integer.valueOf(scalarOp.x().offset()), PointerUtil.getPointer(scalarOp), scalarOp.x(), Integer.valueOf(BlasBufferUtil.getBlasStride(scalarOp.x())), toArgs(scalarOp.extraArgs(), getType(scalarOp)), scalarOp.z(), Integer.valueOf(blockAndThreads.getBlockSize())).setResultArray(scalarOp.z());
                Throwable th3 = null;
                try {
                    try {
                        invokeFunction(scalarOp, z, blockAndThreads, resultArray.getContext(), resultArray.getKernelParameters());
                        context = resultArray.getContext();
                        if (z) {
                            resultArray.sync();
                        }
                        if (resultArray != null) {
                            if (0 != 0) {
                                try {
                                    resultArray.close();
                                } catch (Throwable th4) {
                                    th3.addSuppressed(th4);
                                }
                            } else {
                                resultArray.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                    if (resultArray != null) {
                        if (th3 != null) {
                            try {
                                resultArray.close();
                            } catch (Throwable th5) {
                                th3.addSuppressed(th5);
                            }
                        } else {
                            resultArray.close();
                        }
                    }
                }
            } catch (Exception e2) {
                throw new RuntimeException("Could not execute kernel", e2);
            }
        }
        return context;
    }

    private CudaContext invoke(TransformOp transformOp, boolean z) {
        CudaContext context;
        if (!KernelFunctionLoader.getInstance().exists(transformOp.name()) || (transformOp.x() instanceof IComplexNDArray) || transformOp.isPassThrough()) {
            super.exec(transformOp);
            return null;
        }
        GpuMetrics blockAndThreads = GpuMetrics.blockAndThreads(getType(transformOp), transformOp.n());
        blockAndThreads.setSharedMemoryNotOverMax(blockAndThreads.getBlockSize() * transformOp.x().data().getElementSize());
        if (transformOp.y() != null) {
            blockAndThreads.setSharedMemory(blockAndThreads.getSharedMemory() * 2);
            if (BlasBufferUtil.getBlasStride(transformOp.x()) < 0) {
                transformOp.setX(transformOp.x().dup());
            }
            if (BlasBufferUtil.getBlasStride(transformOp.y()) < 0) {
                transformOp.setY(transformOp.y().dup());
            } else if (transformOp.y().ordering() != transformOp.x().ordering()) {
                transformOp.setY(transformOp.y().dup(transformOp.x().ordering()));
            }
            try {
                KernelParamsWrapper resultArray = new KernelParamsWrapper(transformOp, z, Integer.valueOf(transformOp.n()), Integer.valueOf(transformOp.x().offset()), Integer.valueOf(transformOp.y().offset()), Integer.valueOf(transformOp.z().offset()), transformOp.x(), transformOp.y(), Integer.valueOf(BlasBufferUtil.getBlasStride(transformOp.x())), Integer.valueOf(BlasBufferUtil.getBlasStride(transformOp.y())), toArgs(transformOp.extraArgs(), getType(transformOp)), transformOp.z(), Integer.valueOf(BlasBufferUtil.getBlasStride(transformOp.z())), Integer.valueOf(blockAndThreads.getBlockSize())).setResultArray(transformOp.z());
                Throwable th = null;
                try {
                    try {
                        invokeFunction(transformOp, z, blockAndThreads, resultArray.getContext(), resultArray.getKernelParameters());
                        context = resultArray.getContext();
                        if (z) {
                            resultArray.sync();
                        }
                        if (resultArray != null) {
                            if (0 != 0) {
                                try {
                                    resultArray.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                resultArray.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            } catch (Exception e) {
                throw new RuntimeException("Could not execute kernel", e);
            }
        } else {
            try {
                KernelParamsWrapper resultArray2 = new KernelParamsWrapper(transformOp, z, Integer.valueOf(transformOp.n()), Integer.valueOf(transformOp.x().offset()), transformOp.x(), Integer.valueOf(BlasBufferUtil.getBlasStride(transformOp.x())), toArgs(transformOp.extraArgs(), getType(transformOp)), transformOp.z(), Integer.valueOf(blockAndThreads.getBlockSize())).setResultArray(transformOp.z());
                Throwable th3 = null;
                try {
                    invokeFunction(transformOp, z, blockAndThreads, resultArray2.getContext(), resultArray2.getKernelParameters());
                    context = resultArray2.getContext();
                    if (z) {
                        resultArray2.sync();
                    }
                    if (resultArray2 != null) {
                        if (0 != 0) {
                            try {
                                resultArray2.close();
                            } catch (Throwable th4) {
                                th3.addSuppressed(th4);
                            }
                        } else {
                            resultArray2.close();
                        }
                    }
                } finally {
                }
            } catch (Exception e2) {
                throw new RuntimeException("Could not execute kernel", e2);
            }
        }
        return context;
    }

    private void invokeFunction(Op op, boolean z, GpuMetrics gpuMetrics, CudaContext cudaContext, Object... objArr) {
        gpuMetrics.validate();
        KernelFunctions.invoke(gpuMetrics, z, ((op instanceof TransformOp) || (op instanceof Accumulation)) ? op.name() + "_strided" : op.name(), getType(op), cudaContext, objArr);
    }

    private String getType(Op op) {
        return op.x().data().dataType() == DataBuffer.Type.DOUBLE ? "double" : "float";
    }
}
