gpt4 book ai didi

linux - 接收和发送缓冲区的大小如何影响 TCP 的性能?

转载 作者:可可西里 更新时间:2023-11-01 02:54:08 28 4
gpt4 key购买 nike

我有一个关于 recv() 和 send() 缓冲区的大小如何影响 TCP 性能的问题。考虑以下完全可用的 C++ 示例,它通过 TCP 从客户端向服务器传输 1 GB(任意)数据。

#include <unistd.h>
#include <netdb.h>
#include <errno.h>
#include <netinet/tcp.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/ioctl.h>

#include <iostream>
#include <memory>
#include <cstring>
#include <cstdlib>
#include <stdexcept>
#include <algorithm>
#include <string>
#include <sstream>

typedef unsigned long long TimePoint;
typedef unsigned long long Duration;

inline TimePoint getTimePoint() {
struct ::timeval tv;
::gettimeofday(&tv, nullptr);
return tv.tv_sec * 1000000ULL + tv.tv_usec;
}

const size_t totalSize = 1024 * 1024 * 1024;
const int one = 1;

void server(const size_t blockSize, const std::string& serviceName) {
std::unique_ptr<char[]> block(new char[blockSize]);
const size_t atLeastReads = totalSize / blockSize;
std::cout << "Starting server. Receiving block size is " << blockSize << ", which requires at least " << atLeastReads << " reads." << std::endl;
addrinfo hints;
memset(&hints, 0, sizeof(addrinfo));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
hints.ai_protocol = 0;
addrinfo* firstAddress;
int result = getaddrinfo(nullptr, serviceName.c_str(), &hints, &firstAddress);
if (result != 0) return;
int listener = socket(firstAddress->ai_family, firstAddress->ai_socktype, firstAddress->ai_protocol);
if (listener == -1) return;
if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) != 0) return;
if (bind(listener, firstAddress->ai_addr, firstAddress->ai_addrlen) != 0) return;
freeaddrinfo(firstAddress);
if (listen(listener, 1) != 0) return;
while (true) {
int server = accept(listener, nullptr, nullptr);
if (server == -1) return;
u_long mode = 1;
if (::ioctl(server, FIONBIO, &mode) != 0) return;
// if (setsockopt(server, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) != 0) return;
// int size = 64000;
// if (setsockopt(server, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)) != 0) return;
// if (setsockopt(server, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) != 0) return;
std::cout << "Server accepted connection." << std::endl;
size_t leftToRead = totalSize;
size_t numberOfReads = 0;
size_t numberOfIncompleteReads = 0;
const TimePoint totalStart = ::getTimePoint();
Duration selectDuration = 0;
Duration readDuration = 0;
while (leftToRead > 0) {
fd_set readSet;
FD_ZERO(&readSet);
FD_SET(server, &readSet);
TimePoint selectStart = ::getTimePoint();
if (select(server + 1, &readSet, nullptr, nullptr, nullptr) == -1) return;
selectDuration += ::getTimePoint() - selectStart;
if (FD_ISSET(server, &readSet) != 0) {
const size_t toRead = std::min(leftToRead, blockSize);
TimePoint readStart = ::getTimePoint();
const ssize_t actuallyRead = recv(server, block.get(), toRead, 0);
readDuration += ::getTimePoint() - readStart;
if (actuallyRead == -1)
return;
else if (actuallyRead == 0) {
std::cout << "Got 0 bytes, which signals that the client closed the socket." << std::endl;
break;
}
else if (toRead != actuallyRead)
++numberOfIncompleteReads;
++numberOfReads;
leftToRead -= actuallyRead;
}
}
const Duration totalDuration = ::getTimePoint() - totalStart;
std::cout << "Receiving took " << totalDuration << " us, transfer rate was " << totalSize / (totalDuration / 1000000.0) << " bytes/s." << std::endl;
std::cout << "Selects took " << selectDuration << " us, while reads took " << readDuration << " us." << std::endl;
std::cout << "There were " << numberOfReads << " reads (factor " << numberOfReads / ((double)atLeastReads) << "), of which " << numberOfIncompleteReads << " (" << (numberOfIncompleteReads / ((double)numberOfReads)) * 100.0 << "%) were incomplete." << std::endl << std::endl;
close(server);
}
}

bool client(const size_t blockSize, const std::string& hostName, const std::string& serviceName) {
std::unique_ptr<char[]> block(new char[blockSize]);
const size_t atLeastWrites = totalSize / blockSize;
std::cout << "Starting client... " << std::endl;
addrinfo hints;
memset(&hints, 0, sizeof(addrinfo));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = 0;
hints.ai_protocol = 0;
addrinfo* firstAddress;
if (getaddrinfo(hostName.c_str(), serviceName.c_str(), &hints, &firstAddress) != 0) return false;
int client = socket(firstAddress->ai_family, firstAddress->ai_socktype, firstAddress->ai_protocol);
if (client == -1) return false;
if (connect(client, firstAddress->ai_addr, firstAddress->ai_addrlen) != 0) return false;
freeaddrinfo(firstAddress);
u_long mode = 1;
if (::ioctl(client, FIONBIO, &mode) != 0) return false;
// if (setsockopt(client, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) != 0) return false;
// int size = 64000;
// if (setsockopt(client, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)) != 0) return false;
// if (setsockopt(client, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) != 0) return false;
std::cout << "Client connected. Sending block size is " << blockSize << ", which requires at least " << atLeastWrites << " writes." << std::endl;
size_t leftToWrite = totalSize;
size_t numberOfWrites = 0;
size_t numberOfIncompleteWrites = 0;
const TimePoint totalStart = ::getTimePoint();
Duration selectDuration = 0;
Duration writeDuration = 0;
while (leftToWrite > 0) {
fd_set writeSet;
FD_ZERO(&writeSet);
FD_SET(client, &writeSet);
TimePoint selectStart = ::getTimePoint();
if (select(client + 1, nullptr, &writeSet, nullptr, nullptr) == -1) return false;
selectDuration += ::getTimePoint() - selectStart;
if (FD_ISSET(client, &writeSet) != 0) {
const size_t toWrite = std::min(leftToWrite, blockSize);
TimePoint writeStart = ::getTimePoint();
const ssize_t actuallyWritten = send(client, block.get(), toWrite, 0);
writeDuration += ::getTimePoint() - writeStart;
if (actuallyWritten == -1)
return false;
else if (actuallyWritten == 0) {
std::cout << "Got 0 bytes, which shouldn't happen!" << std::endl;
break;
}
else if (toWrite != actuallyWritten)
++numberOfIncompleteWrites;
++numberOfWrites;
leftToWrite -= actuallyWritten;
}
}
const Duration totalDuration = ::getTimePoint() - totalStart;
std::cout << "Writing took " << totalDuration << " us, transfer rate was " << totalSize / (totalDuration / 1000000.0) << " bytes/s." << std::endl;
std::cout << "Selects took " << selectDuration << " us, while writes took " << writeDuration << " us." << std::endl;
std::cout << "There were " << numberOfWrites << " writes (factor " << numberOfWrites / ((double)atLeastWrites) << "), of which " << numberOfIncompleteWrites << " (" << (numberOfIncompleteWrites / ((double)numberOfWrites)) * 100.0 << "%) were incomplete." << std::endl << std::endl;
if (shutdown(client, SHUT_WR) != 0) return false;
if (close(client) != 0) return false;
return true;
}

int main(int argc, char* argv[]) {
if (argc < 2)
std::cout << "Block size is missing." << std::endl;
else {
const size_t blockSize = static_cast<size_t>(std::atoll(argv[argc - 1]));
if (blockSize > 1024 * 1024)
std::cout << "Block size " << blockSize << " is suspicious." << std::endl;
else {
if (argc >= 3) {
if (!client(blockSize, argv[1], "12000"))
std::cout << "The client encountered an error." << std::endl;
}
else {
server(blockSize, "12000");
std::cout << "The server encountered an error." << std::endl;
}
}
}
return 0;
}

我在通过 1 Gbit/s LAN 连接的两台 Linux(内核版本 4.1.10-200.fc22.x86_64)机器上运行该示例,在这些机器上我得到以下行为:如果 recv() 和 send( ) 系统调用使用 40 字节或更多的缓冲区,然后我使用所有可用带宽;但是,如果我在服务器或客户端上使用较小的缓冲区,则吞吐量会下降。此行为似乎不受注释掉的套接字选项(Nagle 算法和/或发送/接收缓冲区大小)的影响。

我能理解以小块发送数据可能效率低下:如果关闭 Nagle 算法并且 block 很小,那么 TCP 和 IP 的 header 大小可能会支配有用的有效负载。但是,我不希望接收缓冲区大小影响传输速率:与通过 LAN 实际发送数据的成本相比,我希望 recv() 系统调用的成本便宜。因此,如果我以 5000 字节 block 的形式发送数据,我希望传输速率在很大程度上独立于接收缓冲区的大小,因为我调用 recv() 的速率仍应大于 LAN 传输速率.唉,事实并非如此!

如果有人能向我解释导致速度变慢的原因,我将不胜感激:这仅仅是系统调用的成本,还是协议(protocol)级别发生了什么?

我在编写基于消息的云应用程序时遇到了这个问题,如果有人能告诉我这个问题在他们看来应该如何影响系统架构,我将不胜感激。由于各种原因,我没有使用诸如 ZeroMQ 之类的消息传递库,而是自己编写消息传递接口(interface)。云中的计算使得服务器之间的消息流不对称(即,根据工作负载,服务器 A 可以向服务器 B 发送更多数据,反之亦然),消息是异步的(即,消息之间的时间不可预测,但许多消息可以突发发送),消息的大小可变,通常很小(10 到 20 字节)。此外,消息原则上可以乱序传递,但重要的是消息不会被丢弃,并且还需要一些流量/拥塞控制;因此,我使用的是 TCP 而不是 UDP。由于消息的大小各不相同,因此每条消息都以指定消息大小的整数开头,后跟消息有效负载。要从套接字读取消息,我首先读取消息大小,然后读取有效负载;因此,读取一条消息至少需要两次 recv() 调用(可能更多,因为 recv() 返回的数据少于请求的数据)。现在因为消息大小和消息负载都很小,所以我最终会收到许多小的 recv() 请求,正如我的示例所展示的那样,这些请求无法让我充分利用可用带宽。对于在这种情况下构造消息传递的“正确”方式,有没有人有任何建议?

非常感谢您的帮助!

最佳答案

  • 您不需要两次 recv() 调用来读取您描述的数据。更智能的代码,或 recvmsg(),将解决这个问题。您只需要能够应对下一条消息中的某些数据可能已被读取这一事实。

  • 套接字接收缓冲区应至少与链路的带宽延迟乘积一样大。通常这将是许多千字节。

  • 套接字发送缓冲区应至少与对等方的套接字接收缓冲区一样大。

否则您无法使用所有可用带宽。

编辑在下面解决您的评论:

I don't understand why the size of the recv()/send() buffers in the user space should affect the throughput.

它会影响吞吐量,因为它会影响可以传输的数据量,其最大值由链路的带宽延迟乘积给出。

As people have said above, requests to recv()/send() do not affect the protocol.

这是垃圾。对send()的请求导致数据被发送,这通过使协议(protocol)参与发送而影响协议(protocol),而对recv()的请求导致数据被发送从接收缓冲区中删除,这会通过更改下一个 ACK​​ 通告的接收窗口来影响协议(protocol)。

Hence, I would expect that, as long as the kernel has enough space in its buffers, and as long as I read this data sufficiently quickly, there shouldn't be any problems. This, however, is not what I observed: (i) changing the sizes of the kernel buffer had no effect, and (ii) I used the available bandwidth already with 40 bytes buffers.

不,你没有。 1980 年代初期发表的一项研究显示,通过将套接字缓冲区从 1024 提高到 4096,吞吐量比当时早期和慢速版本的以太网增加了三倍。如果您认为您观察到不同,那么您没有。根据定义,任何小于带宽延迟乘积的套接字缓冲区大小都会抑制性能。

关于linux - 接收和发送缓冲区的大小如何影响 TCP 的性能?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34096159/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com