gpt4 book ai didi

c# - 哪种方式更好?将媒体文件作为字节数组还是字符串保存到 MongoDB?

转载 作者:行者123 更新时间:2023-12-02 00:52:04 24 4
gpt4 key购买 nike

我正在 MongoDB 中将媒体文件(图片、PDF 等)保存为字节数组。我看到人们通过将字节数组编码和解码为字符串来保存它的示例。有什么不同?也许性能上有区别?那么哪种方式更好呢?

我注意到,当文件保存为字节数组时,Mongo Management Studio 打开集合的时间比保存为字符串时的时间长

最佳答案

我假设您想将文件存储在文档中。但是您是否考虑过使用 GridFS 与将文件存储在文档中?

就像 Liam 指出的那样,MongoDB 提供了一篇关于 GridFS 注意事项的博客文章 here

我正在从事的项目的优点之一是无需检查文件大小,您可以简单地在二进制流中写入和读取文件。

从性能角度来看,以二进制形式保存和检索文件比首先将其序列化为字符串要快。在针对 MongoDb 3.2 数据库运行的测试程序中,以二进制形式在文档中保存文件比以字符串序列化形式保存文件快 3 倍。这是可以理解的,因为字符串序列化形式只是要保存或读取“更多字节”。

在同一个测试程序中,还针对 GridFS 进行了快速测试,但是您确实必须使用 block 大小进行一轮测试才能获得最佳性能。

下面是一个非常粗糙的测试程序的代码转储(请注意,您必须自己提供正确的example.jpg,并且数据库连接已被硬编码。)

class Program
{
static bool keepRunning;
static string fileName = "example.jpg";
static int numDocs = 571;
static IMongoDatabase mongoDb;

static void Main(string[] args)
{
Console.CancelKeyPress += delegate
{
Exit();
};

keepRunning = true;

SetupMongoDb();

var fileBytes = File.ReadAllBytes(fileName);
Console.WriteLine($"Picturesize in bytes: {fileBytes.Length}");

ClearCollections();

Console.WriteLine($"Saving {numDocs} pictures to the database.");

Console.WriteLine("\nStart Saving in Binary Mode.");
Stopwatch binaryStopWatch = Stopwatch.StartNew();
SaveBinaryBased(numDocs, fileBytes);
binaryStopWatch.Stop();
Console.WriteLine("Done Saving in Binary Mode.");

Console.WriteLine("\nStart Saving in String-based Mode.");
Stopwatch stringStopWatch = Stopwatch.StartNew();
SaveStringBased(numDocs, fileBytes);
stringStopWatch.Stop();
Console.WriteLine("Done Saving in String-based Mode.");

Console.WriteLine("\nTime Report Saving");
Console.WriteLine($" * Total Time Binary for {numDocs} records: {binaryStopWatch.ElapsedMilliseconds} ms.");
Console.WriteLine($" * Total Time String for {numDocs} records: {stringStopWatch.ElapsedMilliseconds} ms.");

Console.WriteLine("\nCollection Statistics:");
Statistics("binaryPics");
Statistics("stringBasedPics");

Console.WriteLine("\nTest Retrieval:");
Console.WriteLine("\nStart Retrieving from binary collection.");
binaryStopWatch.Restart();
RetrieveBinary();
binaryStopWatch.Stop();
Console.WriteLine("Done Retrieving from binary collection.");

Console.WriteLine("\nStart Retrieving from string-based collection.");
stringStopWatch.Restart();
RetrieveString();
stringStopWatch.Stop();
Console.WriteLine("Done Retrieving from string-based collection.");

Console.WriteLine("\nTime Report Retrieving:");
Console.WriteLine($" * Total Time Binary for retrieving {numDocs} records: {binaryStopWatch.ElapsedMilliseconds} ms.");
Console.WriteLine($" * Total Time String for retrieving {numDocs} records: {stringStopWatch.ElapsedMilliseconds} ms.");

ClearGridFS();
Console.WriteLine($"\nStart saving {numDocs} files to GridFS:");
binaryStopWatch.Restart();
SaveFilesToGridFS(numDocs, fileBytes);
binaryStopWatch.Stop();
Console.WriteLine($"Saved {numDocs} files to GridFS in {binaryStopWatch.ElapsedMilliseconds} ms.");

Console.WriteLine($"\nStart retrieving {numDocs} files from GridFS:");
binaryStopWatch.Restart();
RetrieveFromGridFS();
binaryStopWatch.Stop();
Console.WriteLine($"Retrieved {numDocs} files from GridFS in {binaryStopWatch.ElapsedMilliseconds} ms.");

while (keepRunning)
{
Thread.Sleep(500);
}
}

private static void Exit()
{
keepRunning = false;
}

private static void ClearCollections()
{
var collectionBin = mongoDb.GetCollection<BsonDocument>("binaryPics");
var collectionString = mongoDb.GetCollection<BsonDocument>("stringBasedPics");

collectionBin.DeleteMany(new BsonDocument());
collectionString.DeleteMany(new BsonDocument());
}

private static void SetupMongoDb()
{
string hostName = "localhost";
int portNumber = 27017;
string databaseName = "exampleSerialization";

var clientSettings = new MongoClientSettings()
{
Server = new MongoServerAddress(hostName, portNumber),
MinConnectionPoolSize = 1,
MaxConnectionPoolSize = 1500,
ConnectTimeout = new TimeSpan(0, 0, 30),
SocketTimeout = new TimeSpan(0, 1, 30),
WaitQueueTimeout = new TimeSpan(0, 1, 0)
};

mongoDb = new MongoClient(clientSettings).GetDatabase(databaseName);
}

private static void SaveBinaryBased(int numDocuments, byte[] content)
{
var collection = mongoDb.GetCollection<BsonDocument>("binaryPics");

BsonDocument baseDoc = new BsonDocument();
baseDoc.SetElement(new BsonElement("jpgContent", content));

for (int i = 0; i < numDocs; ++i)
{
baseDoc.SetElement(new BsonElement("_id", Guid.NewGuid()));
baseDoc.SetElement(new BsonElement("filename", fileName));
baseDoc.SetElement(new BsonElement("title", $"picture number {i}"));
collection.InsertOne(baseDoc);
}
}

private static void SaveStringBased(int numDocuments, byte[] content)
{
var collection = mongoDb.GetCollection<BsonDocument>("stringBasedPics");

BsonDocument baseDoc = new BsonDocument();
baseDoc.SetElement(new BsonElement("jpgStringContent", System.Text.Encoding.UTF8.GetString(content)));

for (int i = 0; i < numDocs; ++i)
{
baseDoc.SetElement(new BsonElement("_id", Guid.NewGuid()));
baseDoc.SetElement(new BsonElement("filename", fileName));
baseDoc.SetElement(new BsonElement("title", $"picture number {i}"));
collection.InsertOne(baseDoc);
}
}

private static void Statistics(string collectionName)
{
new BsonDocument { { "collstats", collectionName } };
var command = new BsonDocumentCommand<BsonDocument>(new BsonDocument { { "collstats", collectionName } });
var stats = mongoDb.RunCommand(command);

Console.WriteLine($" * Collection : {collectionName}");
Console.WriteLine($" * Count : {stats["count"].AsInt32} documents");
Console.WriteLine($" * Average Doc Size: {stats["avgObjSize"].AsInt32} bytes");
Console.WriteLine($" * Total Storage : {stats["storageSize"].AsInt32} bytes");
Console.WriteLine("\n");
}

private static void RetrieveBinary()
{
var collection = mongoDb.GetCollection<BsonDocument>("binaryPics");
var docs = collection.Find(new BsonDocument()).ToEnumerable();

foreach (var doc in docs)
{
byte[] fileArray = doc.GetElement("jpgContent").Value.AsByteArray;
// we can simulate that we do something with the results but that's not the purpose of this experiment
fileArray = null;
}
}

private static void RetrieveString()
{
var collection = mongoDb.GetCollection<BsonDocument>("stringBasedPics");
var docs = collection.Find(new BsonDocument()).ToEnumerable();

foreach (var doc in docs)
{
// Simply get the string, we don't want to hit the performance test
// with a conversion to a byte array
string result = doc.GetElement("jpgStringContent").Value.AsString;
}
}

private static void SaveFilesToGridFS(int numFiles, byte[] content)
{
var bucket = new GridFSBucket(mongoDb, new GridFSBucketOptions
{
BucketName = "pictures"
});

for (int i = 0; i < numFiles; ++i)
{
string targetFileName = $"{fileName.Substring(0, fileName.Length - ".jpg".Length)}{i}.jpg";
int chunkSize = content.Length <= 1048576 ? 51200 : 1048576;
bucket.UploadFromBytes(targetFileName, content, new GridFSUploadOptions { ChunkSizeBytes = chunkSize });
}
}

private static void ClearGridFS()
{
var bucket = new GridFSBucket(mongoDb, new GridFSBucketOptions { BucketName = "pictures" });
bucket.Drop();
}

private static void RetrieveFromGridFS()
{
var bucket = new GridFSBucket(mongoDb, new GridFSBucketOptions { BucketName = "pictures" });
var filesIds = mongoDb.GetCollection<BsonDocument>("pictures.files").Find(new BsonDocument()).ToEnumerable().Select(doc => doc.GetElement("_id").Value);

foreach (var id in filesIds)
{
var fileBytes = bucket.DownloadAsBytes(id);
fileBytes = null;
}
}
}

关于c# - 哪种方式更好?将媒体文件作为字节数组还是字符串保存到 MongoDB?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41375192/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com