gpt4 book ai didi

C# WebClient - 下载文件后 LOH 大幅增加

转载 作者:IT王子 更新时间:2023-10-28 23:36:13 24 4
gpt4 key购买 nike

我有一个类负责在下载管理器中下载文件。该类负责下载文件并将其写入给定路径。

要下载的文件大小通常在 1 到 5 MB 之间,但也可能更大。我正在使用 WebClient 类的实例从 Internet 获取文件。

public class DownloadItem
{
#region Events
public delegate void DownloadItemDownloadCompletedEventHandler(object sender, DownloadCompletedEventArgs args);

public event DownloadItemDownloadCompletedEventHandler DownloadItemDownloadCompleted;

protected virtual void OnDownloadItemDownloadCompleted(DownloadCompletedEventArgs e)
{
DownloadItemDownloadCompleted?.Invoke(this, e);
}

public delegate void DownloadItemDownloadProgressChangedEventHandler(object sender, DownloadProgressChangedEventArgs args);

public event DownloadItemDownloadProgressChangedEventHandler DownloadItemDownloadProgressChanged;

protected virtual void OnDownloadItemDownloadProgressChanged(DownloadProgressChangedEventArgs e)
{
DownloadItemDownloadProgressChanged?.Invoke(this, e);
}
#endregion

#region Fields
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
private WebClient _client;
#endregion

#region Properties
public PlaylistItem Item { get; }
public string SavePath { get; }
public bool Overwrite { get; }
#endregion

public DownloadItem(PlaylistItem item, string savePath, bool overwrite = false)
{
Item = item;
SavePath = savePath;
Overwrite = overwrite;
}

public void StartDownload()
{
if (File.Exists(SavePath) && !Overwrite)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true));
return;
}

OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(1));
Item.RetreiveDownloadUrl();

if (string.IsNullOrEmpty(Item.DownloadUrl))
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, new InvalidOperationException("Could not retreive download url")));
return;
}

// GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
using (_client = new WebClient())
{
_client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

try
{
_client.DownloadDataCompleted +=
(sender, args) =>
{
Task.Run(() =>
{
DownloadCompleted(args);
});
};
_client.DownloadProgressChanged += (sender, args) => OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(args.ProgressPercentage));
_client.DownloadDataAsync(new Uri(Item.DownloadUrl));
}
catch (Exception ex)
{
Logger.Warn(ex, "Error downloading track {0}", Item.VideoId);
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
}
}
}

private void DownloadCompleted(DownloadDataCompletedEventArgs args)
{
// _client = null;

// GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
// GC.Collect(2, GCCollectionMode.Forced);

if (args.Cancelled)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, args.Error));
return;
}

try
{
File.WriteAllBytes(SavePath, args.Result);

using (var file = TagLib.File.Create(SavePath))
{
file.Save();
}

try
{
MusicFormatConverter.M4AToMp3(SavePath);
}
catch (Exception)
{
// ignored
}

OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
}
catch (Exception ex)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
Logger.Error(ex, "Error writing track file for track {0}", Item.VideoId);
}
}

public void StopDownload()
{
_client?.CancelAsync();
}

public override int GetHashCode()
{
return Item.GetHashCode();
}

public override bool Equals(object obj)
{
var item = obj as DownloadItem;

return Item.Equals(item?.Item);
}
}

与下载项目的文件大小相比,每次下载都会导致非常大的内存增加。如果我下载一个大小约为 3 MB 的文件,内存使用量将增加约 8 MB。

如您所见,下载会产生很多 LOH,下载后不会清除这些 LOH。即使强制 GC 或设置 GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; 也无助于防止这种内存泄漏。

比较快照 1 和 2 可以看到内存量是由字节数组产生的,这可能是下载结果。

多次下载表明内存泄漏是多么可怕。

在我看来,这是由 WebClient 实例引起的。但是我无法确定究竟是什么导致了这个问题。如果我强制GC也没关系。这里的屏幕显示它没有强制 gc:

是什么导致过热,我该如何解决?这是一个重大错误,假设下载 100 次或更多,该进程将耗尽内存。

编辑


按照建议,我注释掉了负责设置标签并将 M4A 转换为 MP3 的部分。但是转换器只是对 FFMPEG 的调用,所以它不应该是内存泄漏:

class MusicFormatConverter
{
public static void M4AToMp3(string filePath, bool deleteOriginal = true)
{
if(string.IsNullOrEmpty(filePath) || !filePath.EndsWith(".m4a"))
throw new ArgumentException(nameof(filePath));

var toolPath = Path.Combine("tools", "ffmpeg.exe");

var convertedFilePath = filePath.Replace(".m4a", ".mp3");
File.Delete(convertedFilePath);

var process = new Process
{
StartInfo =
{
FileName = toolPath,
#if !DEBUG
WindowStyle = ProcessWindowStyle.Hidden,
#endif
Arguments = $"-i \"{filePath}\" -acodec libmp3lame -ab 128k \"{convertedFilePath}\""
}
};

process.Start();
process.WaitForExit();

if(!File.Exists(convertedFilePath))
throw new InvalidOperationException("File was not converted successfully!");

if(deleteOriginal)
File.Delete(filePath);
}
}

DownloadCompleted() 方法现在看起来像这样:

private void DownloadCompleted(DownloadDataCompletedEventArgs args)
{
// _client = null;

// GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
// GC.Collect(2, GCCollectionMode.Forced);

if (args.Cancelled)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, args.Error));
return;
}

try
{
File.WriteAllBytes(SavePath, args.Result);

/*
using (var file = TagLib.File.Create(SavePath))
{
file.Save();
}

try
{
MusicFormatConverter.M4AToMp3(SavePath);
}
catch (Exception)
{
// ignore
}
*/

OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
}
catch (Exception ex)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
Logger.Error(ex, "Error writing track file for track {0}", Item.VideoId);
}
}

下载7项后的结果: 看来这不是内存泄漏。

作为补充,我也提交了 DownloadManager 类,因为它正在处理整个下载操作。也许这可能是问题的根源。

public class DownloadManager
{
#region Fields
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
private readonly Queue<DownloadItem> _queue;
private readonly List<DownloadItem> _activeDownloads;
private bool _active;
private Thread _thread;
#endregion

#region Construction
public DownloadManager()
{
_queue = new Queue<DownloadItem>();
_activeDownloads = new List<DownloadItem>();
}
#endregion

#region Methods
public void AddToQueue(DownloadItem item)
{
_queue.Enqueue(item);

StartManager();
}

public void Abort()
{
_thread?.Abort();

_queue.Clear();
_activeDownloads.Clear();
}

private void StartManager()
{
if(_active) return;

_active = true;

_thread = new Thread(() =>
{
try
{
while (_queue.Count > 0 && _queue.Peek() != null)
{
DownloadItem();

while (_activeDownloads.Count >= Properties.Settings.Default.ParallelDownloads)
{
Thread.Sleep(10);
}
}

_active = false;
}
catch (ThreadInterruptedException)
{
// ignored
}
});
_thread.Start();
}

private void DownloadItem()
{
if (_activeDownloads.Count >= Properties.Settings.Default.ParallelDownloads) return;

DownloadItem item;
try
{
item = _queue.Dequeue();
}
catch
{
return;
}

if (item != null)
{
item.DownloadItemDownloadCompleted += (sender, args) =>
{
if(args.Error != null)
Logger.Error(args.Error, "Error downloading track {0}", ((DownloadItem)sender).Item.VideoId);

_activeDownloads.Remove((DownloadItem) sender);
};

_activeDownloads.Add(item);
Task.Run(() => item.StartDownload());
}
}
#endregion

最佳答案

最后,经过数十次分析和内存检查,问题现在得到解决。

正如@SimonMourier 已经说过的,这个问题与 UploadFileDownloadDataDownloadStringDownloadFile 方法。查看它们的后端,您可以看到它们都在使用带有此签名的 WebClient 类中的私有(private) DownloadBits 方法:

private byte[] DownloadBits(WebRequest request, Stream writeStream, CompletionDelegate completionDelegate, AsyncOperation asyncOp)

关于返回类型,很明显为什么行为就像我发现的那样:使用上述方法时,内容保存在字节数组中。因此,如果文件大小 > 85,000 字节,则不建议使用这些方法,因为这会导致填充 LOH,直到达到内存限制。如果文件很小,但随着大小的增加,LOH 也会以倍数增长,这可能并不重要。

作为补充,我的最终解决方案:

public class DownloadItem : DownloadManagerItem
{
#region Fields

private static readonly Logger Logger = LogManager.GetCurrentClassLogger();

private WebClient _webClient;

#endregion

#region Properties

public string SavePath { get; }
public bool Overwrite { get; }
public DownloadFormat DownloadFormat { get; }

#endregion

public DownloadItem(PlaylistItem item, string savePath, DownloadFormat downloadFormat, bool overwrite = false)
: base(item)
{
SavePath = savePath;
Overwrite = overwrite;
DownloadFormat = downloadFormat;
}

public override void StartDownload()
{
if (File.Exists(SavePath) && !Overwrite)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true));
return;
}

OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(1));
Item.RetreiveDownloadUrl();

if (string.IsNullOrEmpty(Item.DownloadUrl))
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true,
new InvalidOperationException("Could not retreive download url")));
return;
}

using (_webClient = new WebClient())
{
_webClient.Headers.Add("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

try
{
_webClient.OpenReadCompleted += WebClientOnOpenReadCompleted;

_webClient.OpenReadAsync(new Uri(Item.DownloadUrl));
}
catch (Exception ex)
{
Logger.Warn(ex, "Error downloading track {0}", Item.VideoId);
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
}
}
}

private void WebClientOnOpenReadCompleted(object sender, OpenReadCompletedEventArgs openReadCompletedEventArgs)
{
_webClient.Dispose();

if (openReadCompletedEventArgs.Cancelled)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, openReadCompletedEventArgs.Error));
return;
}

if (!Overwrite && File.Exists(SavePath))
return;

var totalLength = 0;
try
{
totalLength = int.Parse(((WebClient)sender).ResponseHeaders["Content-Length"]);
}
catch (Exception)
{
// ignored
}

try
{
long processed = 0;
var tmpPath = Path.GetTempFileName();

using (var stream = openReadCompletedEventArgs.Result)
using (var fs = File.Create(tmpPath))
{
var buffer = new byte[16 * 1024];
int read;

while ((read = stream.Read(buffer, 0, buffer.Length)) > 0)
{
fs.Write(buffer, 0, read);

processed += read;
OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(processed, totalLength));
}
}

File.Move(tmpPath, SavePath);

OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
}
catch (Exception ex)
{
OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
}
}

public override void StopDownload()
{
_webClient?.CancelAsync();
}

public override void Dispose()
{
_webClient?.Dispose();
}

public override int GetHashCode()
{
return Item.GetHashCode();
}

public override bool Equals(object obj)
{
var item = obj as DownloadItem;

return Item.Equals(item?.Item);
}
}

不过感谢您的帮助!

关于C# WebClient - 下载文件后 LOH 大幅增加,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39997931/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com