gpt4 book ai didi

json - Swift:解析 10k 行数据的最高效方法?

转载 作者:行者123 更新时间:2023-11-28 13:50:15 28 4
gpt4 key购买 nike

因此,我的应用目前正在将 10k 行读入一个变量,然后使用 SwiftyJson 将其解析为 realm。

来源: https://github.com/skishore/makemeahanzi/blob/master/graphics.txt https://github.com/skishore/makemeahanzi/blob/master/dictionary.txt

问题:花费的时间太长:2:28 分钟。它还占用 400mb 内存!

问题:如何让它更快?你们中有任何人有过 Flatbuffers 的经验吗?或 Protobuf

非常欢迎提供帮助!

干杯,多姆


这是代码:

func parseToRealm() {

// each of these files have 9500+ lines of data
// (basically dictionaries with word definitions)
let graphicsFileContents = readFile_Graphics()
let dictFileContents = readFile_Dict()

// check if counts of two source files match
if (graphicsFileContents.count == dictFileContents.count && graphicsFileContents.count > 1 && dictFileContents.count > 1) {
var i = 0
// make empty array of characters
var characterArr = [Characters()]

// loop through two files to get all chars
for jsonString in graphicsFileContents {
// parse data from string into json
let dataFromString = jsonString.data(using: .utf8)
let singleCharJson = try? JSON(data: dataFromString!)


// parse stuff from file1
// ... deleted lines for legal reasons


// DICT information
let dictDataFromString = dictFileContents[i].data(using: .utf8)
let singleDictJson = try? JSON(data: dictDataFromString!)

// parse stuff from that dictionary
// ... deleted lines for legal reasons

characterArr.append(Character)

// Every x characters, write them into DB
if (i % 150 == 0 || i == graphicsFileContents.count){
realmActions.writeCharsToRealm(characterArr: characterArr)
print("Writing \(i)-\(i + 150)")
// reset array to safe memory
characterArr = [Characters()]
}
i+=1
} // end loop file contents
}else{
print ("two files have different counts of lines. aborting...")
}
}


// read graphics file and return all contents as array of strings
// return Array of Strings
func readFile_Graphics () -> [String] {
// predeclare emtpy string array
var myStrings = [String]()
if let path = Bundle.main.path(forResource: "graphics", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file graphics.txt. Error message:")
print(error)
}
}
return myStrings
}



// read dictionary file and return all contents as array of strings
func readFile_Dict () -> [String]{
var myStrings = [""]
if let path = Bundle.main.path(forResource: "dictionary", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file dictionary.txt. Error message:")
print(error)
}
}
return myStrings
}

最佳答案

DispatchQueue.global(qos: .background).async {
guard let path = Bundle.main.path(forResource: "graphics", ofType: "txt") else {
print("Dang! File wasn't found!")
return
}
let cal = Calendar.current
let d1 = Date()

guard let streamReader = StreamReader(path: path) else {
print("Dang! StreamReader couldn't be created!")
return
}
var counter = 0
while !streamReader.atEof {
guard let nextLine = streamReader.nextLine() else {
print("Oops! Reached the end before printing!")
break
}
let json = JSON(parseJSON: nextLine)

counter += 1
print("\(counter): \(nextLine)")
}
let d2 = Date()
let components = cal.dateComponents([.minute], from: d2, to: d1)
print("Diff: \(components.minute!)")
}
}

Stream Reader class

import Foundation

class StreamReader {

let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool

init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {

guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}

deinit {
self.close()
}

/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")

// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}

/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}

/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}

extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}

StreamReader 类逐行读取文本文件,因此无需一次读取整个文件。第一个 block 读取文件的内容。试试上面的代码。这应该可以解决您的问题。请注意,我使用了后台线程,而领域不适用于后台线程(AFAIK)。让我知道是否有帮助。

关于json - Swift:解析 10k 行数据的最高效方法?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54742249/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com