gpt4 book ai didi

html - 使用 Visual Studio 获取 URL 链接的 HTML

转载 作者:行者123 更新时间:2023-11-30 05:40:48 25 4
gpt4 key购买 nike

下面的代码适用于主站点,但不适用于子链接。例如,代码在 url 链接为“www.reddit.com”或“www.wikipedia.org”时有效,但在 url 链接为“www.reddit.com/r/nba”或“www.wikipedia”时无效.org/wiki/2007_NASCAR_Nextel_Cup_Series"你知道如何让它也包含子链接吗?

 #include "stdafx.h"
#include <winsock2.h>
#include <WS2tcpip.h>
#include <windows.h>
#include <iostream>
#pragma comment(lib,"ws2_32.lib")
using namespace std;
int main(){
// Initialize Dependencies to the Windows Socket.
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
cout << "WSAStartup failed.\n";
system("pause");
return -1;
}

// We first prepare some "hints" for the "getaddrinfo" function
// to tell it, that we are looking for a IPv4 TCP Connection.
struct addrinfo hints;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_INET; // We are targeting IPv4
hints.ai_protocol = IPPROTO_TCP; // We are targeting TCP
hints.ai_socktype = SOCK_STREAM; // We are targeting TCP so its SOCK_STREAM

// Aquiring of the IPv4 address of a host using the newer
// "getaddrinfo" function which outdated "gethostbyname".
// It will search for IPv4 addresses using the TCP-Protocol.
struct addrinfo* targetAdressInfo = NULL;
DWORD getAddrRes = getaddrinfo("www.wikipedia.org/wiki/2007_NASCAR_Nextel_Cup_Series", NULL, &hints, &targetAdressInfo);
if (getAddrRes != 0 || targetAdressInfo == NULL)
{
cout << "Could not resolve the Host Name" << endl;
system("pause");
WSACleanup();
return -1;
}

// Create the Socket Address Informations, using IPv4
// We dont have to take care of sin_zero, it is only used to extend the length of SOCKADDR_IN to the size of SOCKADDR
SOCKADDR_IN sockAddr;
sockAddr.sin_addr = ((struct sockaddr_in*) targetAdressInfo->ai_addr)->sin_addr; // The IPv4 Address from the Address Resolution Result
sockAddr.sin_family = AF_INET; // IPv4
sockAddr.sin_port = htons(80); // HTTP Port: 80

// We have to free the Address-Information from getaddrinfo again
freeaddrinfo(targetAdressInfo);

// Creation of a socket for the communication with the Web Server,
// using IPv4 and the TCP-Protocol
SOCKET webSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (webSocket == INVALID_SOCKET)
{
cout << "Creation of the Socket Failed" << endl;
system("pause");
WSACleanup();
return -1;
}

// Establishing a connection to the web Socket
cout << "Connecting...\n";
if (connect(webSocket, (SOCKADDR*)&sockAddr, sizeof(sockAddr)) != 0)
{
cout << "Could not connect";
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}
cout << "Connected.\n";

// Sending a HTTP-GET-Request to the Web Server
const char* httpRequest = "GET / HTTP/1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n";
int sentBytes = send(webSocket, httpRequest, strlen(httpRequest), 0);
if (sentBytes < strlen(httpRequest) || sentBytes == SOCKET_ERROR)
{
cout << "Could not send the request to the Server" << endl;
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}

// Receiving and Displaying an answer from the Web Server
char buffer[10000];
ZeroMemory(buffer, sizeof(buffer));
int dataLen;
while ((dataLen = recv(webSocket, buffer, sizeof(buffer), 0) > 0))
{
int i = 0;
while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') {
cout << buffer[i];
i += 1;
}
}

// Cleaning up Windows Socket Dependencies
closesocket(webSocket);
WSACleanup();

system("pause");
return 0;
}

最佳答案

您必须将 GET/ 替换为 GET/path/you/want/to/access.html 并将服务器名称保留为 www.wikipedia.org 但是看在上帝的份上,使用 WinHttpClient 或 Curl 或其他更高级的东西。

关于html - 使用 Visual Studio 获取 URL 链接的 HTML,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31423315/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com