gpt4 book ai didi

c - libcurl:如何使用原始文件名下载 url? (相当于 "-O/--remote-name")

转载 作者:太空狗 更新时间:2023-10-29 15:26:33 25 4
gpt4 key购买 nike

问题一:使用libcurl下载url时,如何保留下载文件的原名? LibCurl 要求程序员生成文件名。当 URL 有例如在下面的 url 中,很容易找出目标名称是 vimqrc.pdf

 http://tnerual.eriogerg.free.fr/vimqrc.pdf)  

但是当 URL 动态生成目标名称时,例如下面的 URL 下载 AdbeRdr1010_eu_ES.exe。 使用 wget(除 URL 外没有参数)和 curl(参数 -O)

http://get.adobe.com/reader/download/?installer=Reader_10.1_Basque_for_Windows&standalone=1%22

curl (-O) 或 wget 如何计算出

//invoked as ./a.out <URL>

#include <stdio.h>
#include <curl/curl.h>

char *location = "/tmp/test/out";

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}

int main(int argc, char *argv[])
{
CURL *curl;
CURLcode res;
int ret = -1;


if (argc!= 2) {
//invoked as ./a.out <URL>
return -1;
}

curl = curl_easy_init();
if (!curl) {
goto bail;
}

FILE *fp = fopen(location, "wb");
curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //invoked as ./a.out <URL>
/* example.com is redirected, so we tell libcurl to follow redirection */
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);

/* Perform the request, res will get the return code */
res = curl_easy_perform(curl);
/* Check for errors */
if(res != CURLE_OK)
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));

/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
fclose(fp);

bail:
return ret;
}

最佳答案

我在 libcurl 源代码中找到了答案。看起来“远程名称”是 header 中“内容配置”标签的一部分。 Libcurl 正在解析 header 并在 content-disposition 标记中查找“filename=”。此解析在通过 CURLOPT_HEADERFUNCTION 选项提供的回调中完成。最后,在写入数据的回调中(通过 CURLOPT_WRITEFUNCTION 提供),此远程名称用于创建输出文件。

如果缺少文件名,只需从 URL 本身中找出即可。这几乎是从 lib curl 复制的代码和我自己的少量修改以使其更简单并符合我的要求。

#define _GNU_SOURCE 
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdlib.h>

typedef unsigned long uint64_t;
typedef struct {
char dnld_remote_fname[4096];
char dnld_url[4096];
FILE *dnld_stream;
FILE *dbg_stream;
uint64_t dnld_file_sz;
} dnld_params_t;

static int get_oname_from_cd(char const*const cd, char *oname)
{
char const*const cdtag = "Content-disposition:";
char const*const key = "filename=";
int ret = 0;
char *val = NULL;

/* Example Content-Disposition: filename=name1367; charset=funny; option=strange */

/* If filename is present */
val = strcasestr(cd, key);
if (!val) {
printf("No key-value for \"%s\" in \"%s\"", key, cdtag);
goto bail;
}

/* Move to value */
val += strlen(key);

/* Copy value as oname */
while (*val != '\0' && *val != ';') {
//printf (".... %c\n", *val);
*oname++ = *val++;
}
*oname = '\0';

bail:
return ret;
}

static int get_oname_from_url(char const* url, char *oname)
{
int ret = 0;
char const *u = url;

/* Remove "http(s)://" */
u = strstr(u, "://");
if (u) {
u += strlen("://");
}

u = strrchr(u, '/');

/* Remove last '/' */
u++;

/* Copy value as oname */
while (*u != '\0') {
//printf (".... %c\n", *u);
*oname++ = *u++;
}
*oname = '\0';

return ret;
}

size_t dnld_header_parse(void *hdr, size_t size, size_t nmemb, void *userdata)
{
const size_t cb = size * nmemb;
const char *hdr_str= hdr;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;
char const*const cdtag = "Content-disposition:";

/* Example:
* ...
* Content-Type: text/html
* Content-Disposition: filename=name1367; charset=funny; option=strange
*/
if (strstr(hdr_str, "Content-disposition:")) {
printf ("has c-d: %s\n", hdr_str);
}

if (!strncasecmp(hdr_str, cdtag, strlen(cdtag))) {
printf ("Found c-d: %s\n", hdr_str);
int ret = get_oname_from_cd(hdr_str+strlen(cdtag), dnld_params->dnld_remote_fname);
if (ret) {
printf("ERR: bad remote name");
}
}

return cb;
}

FILE* get_dnld_stream(char const*const fname)
{
char const*const pre = "/tmp/";
char out[4096];

snprintf(out, sizeof(out), "%s/%s", pre, fname);

FILE *fp = fopen(out, "wb");
if (!fp) {
printf ("Could not create file %s\n", out);
}

return fp;
}

size_t write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
{
int ret = 0;
dnld_params_t *dnld_params = (dnld_params_t*)userdata;

if (!dnld_params->dnld_remote_fname[0]) {
ret = get_oname_from_url(dnld_params->dnld_url, dnld_params->dnld_remote_fname);
}

if (!dnld_params->dnld_stream) {
dnld_params->dnld_stream = get_dnld_stream(dnld_params->dnld_remote_fname);
}

ret = fwrite(buffer, sz, nmemb, dnld_params->dnld_stream);
if (ret == (sz*nmemb)) {
dnld_params->dnld_file_sz += ret;
}
return ret;
}


int download_url(char const*const url)
{
CURL *curl;
int ret = -1;
CURLcode cerr = CURLE_OK;
dnld_params_t dnld_params;

memset(&dnld_params, 0, sizeof(dnld_params));
strncpy(dnld_params.dnld_url, url, strlen(url));

curl = curl_easy_init();
if (!curl) {
goto bail;
}

cerr = curl_easy_setopt(curl, CURLOPT_URL, url);
if (cerr) { printf ("%s: failed with err %d\n", "URL", cerr); goto bail;}

cerr = curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, dnld_header_parse);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER", cerr); goto bail;}

cerr = curl_easy_setopt(curl, CURLOPT_HEADERDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "HEADER DATA", cerr); goto bail;}

cerr = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
if (cerr) { printf ("%s: failed with err %d\n", "WR CB", cerr); goto bail;}

cerr = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &dnld_params);
if (cerr) { printf ("%s: failed with err %d\n", "WR Data", cerr); goto bail;}


cerr = curl_easy_perform(curl);
if(cerr != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(cerr));
}

printf ("Remote name: %s\n", dnld_params.dnld_remote_fname);
fclose(dnld_params.dnld_stream);

/* always cleanup */
curl_easy_cleanup(curl);
ret = 0;
printf ("file size : %lu\n", dnld_params.dnld_file_sz);

bail:
return ret;
}

int main(int argc, char *argv[])
{
if (argc != 2) {
printf ("Bad args\n");
return -1;
}
return download_url(argv[1]);
}

关于c - libcurl:如何使用原始文件名下载 url? (相当于 "-O/--remote-name"),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25576697/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com