gpt4 book ai didi

Java - 优化包含最简单数学的函数

转载 作者:行者123 更新时间:2023-11-29 06:11:32 26 4
gpt4 key购买 nike

我应该尝试为我的团队优化此方法,使用 Java 开发视频解码器,但我没有看到这样做的任何好方法。下面的函数似乎不会有任何显着的加速,因为它主要包含简单的加法/减法等。

void inverseTransform(int macroBlockIndex, int dataBlockIndex) {
int[] workSpace = new int[64];
short[] data = new short[64];

int z1, z2, z3, z4, z5;
int tmp0, tmp1, tmp2, tmp3;
int tmp10, tmp11, tmp12, tmp13;

int pointer = 0;

for (int index = 8; index > 0; index--) {
if (dataBlockBuffer[pointer + 8] == 0 && dataBlockBuffer[pointer + 16] == 0 && dataBlockBuffer[pointer + 24] == 0 && dataBlockBuffer[pointer + 32] == 0 && dataBlockBuffer[pointer + 40] == 0 && dataBlockBuffer[pointer + 48] == 0 && dataBlockBuffer[pointer + 56] == 0) {
int dcValue = dataBlockBuffer[pointer] << PASS1_BITS;

workSpace[pointer + 0] = dcValue;
workSpace[pointer + 8] = dcValue;
workSpace[pointer + 16] = dcValue;
workSpace[pointer + 24] = dcValue;
workSpace[pointer + 32] = dcValue;
workSpace[pointer + 40] = dcValue;
workSpace[pointer + 48] = dcValue;
workSpace[pointer + 56] = dcValue;

pointer++;
continue;
}

z2 = dataBlockBuffer[pointer + 16];
z3 = dataBlockBuffer[pointer + 48];

z1 = (z2 + z3) * FIX_0_541196100;
tmp2 = z1 + z3 * -FIX_1_847759065;
tmp3 = z1 + z2 * FIX_0_765366865;

z2 = dataBlockBuffer[pointer];
z3 = dataBlockBuffer[pointer + 32];

tmp0 = (z2 + z3) << BITS;
tmp1 = (z2 - z3) << BITS;

tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;

tmp0 = dataBlockBuffer[pointer + 56];
tmp1 = dataBlockBuffer[pointer + 40];
tmp2 = dataBlockBuffer[pointer + 24];
tmp3 = dataBlockBuffer[pointer + 8];

z1 = tmp0 + tmp3;
z2 = tmp1 + tmp2;
z3 = tmp0 + tmp2;
z4 = tmp1 + tmp3;
z5 = (z3 + z4) * FIX_1_175875602;

tmp0 = tmp0 * FIX_0_298631336;
tmp1 = tmp1 * FIX_2_053119869;
tmp2 = tmp2 * FIX_3_072711026;
tmp3 = tmp3 * FIX_1_501321110;
z1 = z1 * -FIX_0_899976223;
z2 = z2 * -FIX_2_562915447;
z3 = z3 * -FIX_1_961570560;
z4 = z4 * -FIX_0_390180644;

z3 += z5;
z4 += z5;

tmp0 += z1 + z3;
tmp1 += z2 + z4;
tmp2 += z2 + z3;
tmp3 += z1 + z4;

workSpace[pointer + 0] = ((tmp10 + tmp3 + (1 << F1)) >> F2);
workSpace[pointer + 56] = ((tmp10 - tmp3 + (1 << F1)) >> F2);
workSpace[pointer + 8] = ((tmp11 + tmp2 + (1 << F1)) >> F2);
workSpace[pointer + 48] = ((tmp11 - tmp2 + (1 << F1)) >> F2);
workSpace[pointer + 16] = ((tmp12 + tmp1 + (1 << F1)) >> F2);
workSpace[pointer + 40] = ((tmp12 - tmp1 + (1 << F1)) >> F2);
workSpace[pointer + 24] = ((tmp13 + tmp0 + (1 << F1)) >> F2);
workSpace[pointer + 32] = ((tmp13 - tmp0 + (1 << F1)) >> F2);

pointer++;
}

pointer = 0;

for (int index = 0; index < 8; index++) {
z2 = workSpace[pointer + 2];
z3 = workSpace[pointer + 6];

z1 = (z2 + z3) * FIX_0_541196100;
tmp2 = z1 + z3 * -FIX_1_847759065;
tmp3 = z1 + z2 * FIX_0_765366865;

tmp0 = (workSpace[pointer + 0] + workSpace[pointer + 4]) << BITS;
tmp1 = (workSpace[pointer + 0] - workSpace[pointer + 4]) << BITS;

tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;

tmp0 = workSpace[pointer + 7];
tmp1 = workSpace[pointer + 5];
tmp2 = workSpace[pointer + 3];
tmp3 = workSpace[pointer + 1];

z1 = tmp0 + tmp3;
z2 = tmp1 + tmp2;
z3 = tmp0 + tmp2;
z4 = tmp1 + tmp3;

z5 = (z3 + z4) * FIX_1_175875602;

tmp0 = tmp0 * FIX_0_298631336;
tmp1 = tmp1 * FIX_2_053119869;
tmp2 = tmp2 * FIX_3_072711026;
tmp3 = tmp3 * FIX_1_501321110;
z1 = z1 * -FIX_0_899976223;
z2 = z2 * -FIX_2_562915447;
z3 = z3 * -FIX_1_961570560;
z4 = z4 * -FIX_0_390180644;

z3 += z5;
z4 += z5;

tmp0 += z1 + z3;
tmp1 += z2 + z4;
tmp2 += z2 + z3;
tmp3 += z1 + z4;

data[pointer + 0] = (short) ((tmp10 + tmp3) >> F3);
data[pointer + 7] = (short) ((tmp10 - tmp3) >> F3);
data[pointer + 1] = (short) ((tmp11 + tmp2) >> F3);
data[pointer + 6] = (short) ((tmp11 - tmp2) >> F3);
data[pointer + 2] = (short) ((tmp12 + tmp1) >> F3);
data[pointer + 5] = (short) ((tmp12 - tmp1) >> F3);
data[pointer + 3] = (short) ((tmp13 + tmp0) >> F3);
data[pointer + 4] = (short) ((tmp13 - tmp0) >> F3);

pointer += 8;
}
short[] temp = imageSlice.MacroBlocks[macroBlockIndex].DataBlocks[dataBlockIndex];
for (int i = 0; i < data.length; i++)
temp[i] = data[i]; //imageSlice.MacroBlocks[macroBlockIndex].DataBlocks[dataBlockIndex][i] = data[i];
}

如果可以的话,我应该结合基础数学吗?或者你有什么建议?

最佳答案

我看不到任何明显的东西。除了亚历克斯所说的之外,还有两个小建议可能会有所帮助:

1) 第一个循环中的长 if 语句有许多失败条件。您是否已订购它,以便最有可能失败的排在第一位?使用短路求值,您越早找到 false,求值整个表达式所需的工作就越少。

2) 您在两个 for 循环之外声明了很多变量,我明白您为什么要这样做。如果您将声明移到两个循环内以便尽可能在本地声明变量,则 JVM 可能会更能够优化事物。

对于这两种情况,您都需要进行一些计时运行,看看它们是否会产生真正的影响。您可能还想使用探查器来查看代码将大部分时间花在哪里。

我还有另一条评论。在像这样的行中:

data[pointer + 7] = (short) ((tmp10 - tmp3) >> F3);

您正在使用 >> 而不是 >>> 来对可能为负数的位进行位移。如果 tmp3 > tmp10,你确定这是你想要做的吗?

关于Java - 优化包含最简单数学的函数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/6653612/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com