在相同的超参数下，Numpy 的性能优于 Tensorflow 和 Pytorch-6ren

在相同的超参数下，Numpy 的性能优于 Tensorflow 和 Pytorch

转载作者：行者123 更新时间：2023-11-30 08:46:29

我为Joel's FizzBuzz implementation制作了3个神经网络，分别在 Numpy、TensorFlow 和 Pytorch 中。使用相同的超参数和 1k epoch，我的 numpy 脚本收敛到 0.002 损失，但我的 pytorch 和 tensorflow 仍然在 0.6 左右跳跃。有人可以帮我弄清楚发生了什么事吗？我不认为 Google 和 [Facebook + Nvidia] 仅仅为了 GPU 提升而做出了比 Numpy 更好的东西。我的代码如下

Numpy

import numpy as np

input_size = 10
epochs = 1000
batches = 64
lr = 0.01


def sig(val):
    return 1 / (1 + np.exp(-val))


def sig_d(val):
    sig_val = sig(val)
    return sig_val * (1 - sig_val)


def binary_enc(num):
    ret = [int(i) for i in '{0:b}'.format(num)]
    return [0] * (input_size - len(ret)) + ret


def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret


def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY


def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))


def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'
    elif i % 5 == 0:
        return 'buz'
    elif i % 3 == 0:
        return 'fiz'
    else:
        return 'number'


trX, trY, teX, teY = x_y_gen()

w1 = np.random.randn(10, 100)
w2 = np.random.randn(100, 4)

b1 = np.zeros((1, 100))
b2 = np.zeros((1, 4))

no_of_batches = int(len(trX) / batches)
for epoch in range(epochs):
    for batch in range(no_of_batches):
        # forward
        start = batch * batches
        end = start + batches
        x = trX[start:end]
        y = trY[start:end]
        a2 = x.dot(w1) + b1
        h2 = sig(a2)
        a3 = h2.dot(w2) + b2
        hyp = sig(a3)
        error = hyp - y
        loss = (error ** 2).mean()

        # backward
        outerror = error
        outgrad = outerror * sig_d(a3)
        outdelta = h2.T.dot(outgrad)
        outbiasdelta = np.ones([1, batches]).dot(outgrad)

        hiddenerror = outerror.dot(w2.T)
        hiddengrad = hiddenerror * sig_d(a2)
        hiddendelta = x.T.dot(hiddengrad)
        hiddenbiasdelta = np.ones([1, batches]).dot(hiddengrad)

        w1 -= hiddendelta * lr
        b1 -= hiddenbiasdelta * lr
        w2 -= outdelta * lr
        b2 -= outbiasdelta * lr
    print(epoch, loss)

# test
a2 = teX.dot(w1) + b1
h2 = sig(a2)
a3 = h2.dot(w2) + b2
hyp = sig(a3)
outli = ['fizbuz', 'buz', 'fiz', 'number']
for i in range(len(teX)):
    num = binary_dec(teX[i])
    print(
        'Number: {} -- Actual: {} -- Prediction: {}'.format(
            num, check_fizbuz(num), outli[hyp[i].argmax()]))
print('Test loss: ', np.mean(teY - hyp))

torch

import numpy as np
import torch as th
from torch.autograd import Variable


input_size = 10
epochs = 1000
batches = 64
lr = 0.01


def binary_enc(num):
    ret = [int(i) for i in '{0:b}'.format(num)]
    return [0] * (input_size - len(ret)) + ret


def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret


def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY


def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))


def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'
    elif i % 5 == 0:
        return 'buz'
    elif i % 3 == 0:
        return 'fiz'
    else:
        return 'number'


trX, trY, teX, teY = x_y_gen()
if th.cuda.is_available():
    dtype = th.cuda.FloatTensor
else:
    dtype = th.FloatTensor
x = Variable(th.from_numpy(trX).type(dtype), requires_grad=False)
y = Variable(th.from_numpy(trY).type(dtype), requires_grad=False)

w1 = Variable(th.randn(10, 100).type(dtype), requires_grad=True)
w2 = Variable(th.randn(100, 4).type(dtype), requires_grad=True)

b1 = Variable(th.zeros(1, 100).type(dtype), requires_grad=True)
b2 = Variable(th.zeros(1, 4).type(dtype), requires_grad=True)

no_of_batches = int(len(trX) / batches)
for epoch in range(epochs):
    for batch in range(no_of_batches):
        start = batch * batches
        end = start + batches
        x_ = x[start:end]
        y_ = y[start:end]

        a2 = x_.mm(w1)
        a2 = a2.add(b1.expand_as(a2))
        h2 = a2.sigmoid()

        a3 = h2.mm(w2)
        a3 = a3.add(b2.expand_as(a3))
        hyp = a3.sigmoid()

        error = hyp - y_
        loss = error.pow(2).sum()
        loss.backward()

        w1.data -= lr * w1.grad.data
        w2.data -= lr * w2.grad.data
        b1.data -= lr * b1.grad.data
        b2.data -= lr * b2.grad.data
        w1.grad.data.zero_()
        w2.grad.data.zero_()
    print(epoch, error.mean().data[0])

TensorFlow

import tensorflow as tf
import numpy as np


input_size = 10
epochs = 1000
batches = 64
learning_rate = 0.01


def binary_enc(num):
    ret = [int(i) for i in '{0:b}'.format(num)]
    return [0] * (input_size - len(ret)) + ret


def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret


def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY


def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))


def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'
    elif i % 5 == 0:
        return 'buz'
    elif i % 3 == 0:
        return 'fiz'
    else:
        return 'number'


trX, trY, teX, teY = x_y_gen()

x = tf.placeholder(tf.float32, [None, 10], name='x')
y = tf.placeholder(tf.float32, [None, 4], name='y')

lr = tf.placeholder(tf.float32, [], name='lr')

w1 = tf.Variable(tf.truncated_normal([10, 100]))
w2 = tf.Variable(tf.truncated_normal([100, 4]))

b1 = tf.Variable(tf.zeros(100))
b2 = tf.Variable(tf.zeros(4))


a2 = tf.sigmoid(tf.add(tf.matmul(x, w1), b1))
hyp = tf.sigmoid(tf.add(tf.matmul(a2, w2), b2))

cost = tf.reduce_mean(tf.square(hyp - y))
optmizer = tf.train.GradientDescentOptimizer(lr).minimize(cost)

prediction = tf.argmax(hyp, 1)

no_of_batches = int(len(trX) / batches)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        p = np.random.permutation(range(len(trX)))
        trX = trX[p]
        trY = trY[p]
        for batch in range(no_of_batches):
            start = batch * batches
            end = start + batches
            input_batch = trX[start: end]
            target_batch = trY[start: end]
            sess.run(
                optmizer, feed_dict={x: input_batch, y: target_batch, lr: learning_rate})
            if epoch % 100 == 0:
                a = np.argmax(teY, axis=1)
                b = sess.run(prediction, feed_dict={x: teX})
                acc = np.mean(a == b)
                out_cost = sess.run(
                    cost, feed_dict={x: input_batch, y: target_batch, lr: learning_rate})
                print('cost - {} --- accuracy - {}'.format(out_cost.mean(), acc))

最佳答案

不能说我已经检查了你所有的代码(工作量太大)，但我注意到你没有将 PyTorch 中的偏差梯度归零，因此这些梯度将继续增长，从而杀死优化算法。 (可能还有其他问题)

更惯用的写法是:

optimizer = torch.optim.SGD(net.parameters(), lr)

for _ in range(steps):

    optimizer.zero_grad()

    # ...

作为一般说明，您在这里使用哪个框架并不重要:数字就是数字:如果您提供相同的批处理，并以相同的方式初始化权重，您应该获得大致相同的梯度。

编辑

还有另一个很大的区别:在 NP 中，您计算的是平均误差(跨批处理和 channel )，而在 PT 中，您将它们相加。这是 40 倍的差异，会影响损失和梯度。

关于在相同的超参数下，Numpy 的性能优于 Tensorflow 和 Pytorch，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/43709859/

文章推荐： javascript - 在javascript中使变量成为函数

文章推荐： java - 按下鼠标时更改 JButton 的图标

文章推荐： javascript - 如何使用 jQuery 创建一个计数器

文章推荐： java - Builder Factory 返回不同的子接口(interface)

c++ - if 中的多个语句是否与多个 if 相同？
是 if(a == 0 && b == 0 && c == 0) { return; } 一样 if(a == 0) { return; } if(b == 0) { return; } if(c =
Java继承-如何设置子类实例的值以供其他子类共享(相同)？
我想做这样的事情: Class A Class B extends A Class C extends A B b = new B(); C c = new C(); b->setField("foo
Mysql查询(相同)不同的结果集返回不同的结果顺序
我对 Mysql 世界很天真......:)我试图使用连接从表中查询，我遇到结果集问题...表结构如下下面... VIDEO_XXXXX | Field | Type
java - 相同(？)对象的不同对象引用
我最近问过关于从另一个类获取类的唯一实例的问题。 ( How to get specific instance of class from another class in Java? ) 所以，我正
c++ - 比较两种类型是否为 "literally"相同
假设我们有两种类型 using t1 = int*; using t2 = int*; 我知道 std::is_same::value会给我们true .什么是，或者是否有模板工具可以实现以下目标？
PHP - 为什么比较两个完整的长(相同)字符串比比较每个字符串的第一个字符要快得多？
对于我的一个应用程序，我假设比较 2 个字符串的第一个字符比比较整个字符串是否相等要快。例如，如果我知道只有 2 个可能的字符串(在一组 n 字符串中)可以以相同的字母开头(比如说 'q')，如果是这
c - 相同(重复)代码的时钟周期值不同
我想在我的NXP LPC11U37H主板（ARM Cortex-M0）上分析一些算法，因为我想知道执行特定算法需要多少个时钟周期。我编写了这些简单的宏来进行一些分析： #define START_C
excel - 如何在所有工作表中保持页眉(不是静态页眉)相同？
我在 Excel 中创建了一个宏，它将在 Excel 中复制一个表格，并将行除以我确定的特定数字(默认 = 500 行)，并为宏创建的每个部门打开不同的工作表。使用的代码是这样的: Sub Copy
python - 如果一个字典的值与第二个字典的键和第二个字典值 Python 相同
我想根据第一个字典对第二个字典的值求和。如果我有字典 A 和 B。 A = {"Mark": ["a", "b", "c", "d"], "June": ["e", "a"], "John": ["a
perl - system() 返回的值是否与 "$?"相同？
当我这样做时 system()在 Perl 中调用，我通常根据 perldocs 检查返回码.嗯，我是这么想的。大部分时间 $rc!=0对我来说已经足够了。最近我在这里帮助了两个遇到问题的人syste
javascript - 进度条加载速度与 div 相同
在我的进度条上，我试图让它检测 div 加载速度。如果 div 加载速度很快，我想要实现的目标将很快达到 100%。但进度条的加载速度应该与 div 的加载速度一样快。问题:如何让我的进度条加载
Firebase 服务器时间戳与本地(几乎)相同
当我获得与本地时间相同的时间戳时，firebase 生成的服务器时间戳是否会自动转换为本地时间，或者我错过了什么？ _firestore.collection("9213903123").docume
semantics - OWL 双关语是否将同名的类和个体在语义上视为“相同”？
根据the original OWL definition of OWL DL ，我们不能为类和个体赋予相同的名称(这是 OWL DL 和 OWL Full 之间的明显区别)。 "Punning" i
javascript - 允许两个输入复选框的行为与 jquery 相同
我有两个输入复选框: 尝试使用 jQuery 来允许两个输入的行为相同。如果选中第一个复选框，则选中第二个复选框。如果未检查第 1 个，则不会检查第 2 个。反之亦然。我有代码: $('inpu
java - 相同 Java 文件的编译
可以从不同系统编译两个相同的java文件，但它们都有相同的内容操作系统(Windows 7)，会生成不同的.class文件(大小)？最佳答案是的，您可以检查是否有不同版本的JDK(Java Dev
regex - 正则表达式 - .*$ 与 .* 相同
我正在清理另一个人的正则表达式，他们目前所有的都以结尾 .*$ 那么下面的不是完全一样吗？ .* 最佳答案 .*将尽可能匹配，但默认情况下为 .不匹配换行符。如果您要匹配的文本有换行符并且您处于 MU
TypeScript:与Pick <...>相同，但具有多个字段
我使用 Pick ，但是如何编写可以选择多个字段的通用PickMulti呢？ interface MyInterface { a: number, b: number, c: number
sql - 相同 SQL 查询在一个数据库中运行的时间比在同一服务器下的另一个数据库中运行的时间长
我有一个 SQL 数据库服务器和 2 个具有相同结构和数据的数据库。我在 2 个数据库中运行相同的 sql 查询，其中一个需要更长的时间，而另一个在不到 50% 的时间内完成。他们都有不同的执行计划。
php - 使列与 id 相同
我需要你的帮助，我有一个包含两列的表，一个 id 和 numpos，我希望 id 和 numops 具有相同的结果。例子: $cnx = mysql_connect( "localhost", "r
PHP - 表 ID 相同
如何将相同的列(在本例中按“级别”排序)放在一起？我正在做一个高分，我从我的数据库中按级别列出它们。如果他们处于同一级别，我希望他们具有相同的 ID。但是我不想在别人身上显示ID。只有第一个。这是一

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城