gpt4 book ai didi

c# - 如何在 .NET 中获取 xml 元素的流位置

转载 作者:太空宇宙 更新时间:2023-11-03 16:52:01 26 4
gpt4 key购买 nike

如何以合理有效的方式获取 .NET 4 中 XElement 的流位置?

          1         2         3         4         5         6         7         8
01234567890123456789012345678901234567890123456789012345678901234567890123456789012
<root><group id="0" combiner="or"><filter id="1" /><filter id="2" /></group></root>

我想从上面创建一个到段的映射

{ { "/root",                  Segment(0 , 82) },
{ "/root/group-0", Segment(6 , 75) },
{ "/root/group-0/filter-1", Segment(34, 50) },
{ "/root/group-0/filter-2", Segment(51, 67) } }

注意事项

  • 段的第二个字段可以是长度而不是结束索引
  • 方法可以更通用/扩展到其他字节表示

关于我的答案的博文和内存分析屏幕截图

http://corsis.posterous.com/xml-keyvalue-cache-optimizations

奖金

  • 使用一种压缩形式,允许O(1) 次访问 元素 只需要整体的一个副本没有任何子元素在内存中重复的文档

奖励示例

store["/root"].Decompress()         **O(1)**
store["/root/group-0"].Decompress() **O(1)**

最佳答案

这是我最初的尝试:

using System;
using System.Collections.Generic;
using System.Collections.Concurrent;
using System.Linq;
using System.IO;
using System.Xml;
using System.Xml.Linq;
using System.Text;

namespace XMLTest
{
public struct Segment
{
public Segment(long index, long length)
{
Index = index;
Length = length;
}

public long Index;
public long Length;

public override string ToString()
{
return string.Format("Segment({0}, {1})", Index, Length);
}
}

public static class GeneralSerializationExtensions
{
public static string Segment(this string buffer, Segment segment)
{
return buffer.Substring((int)segment.Index, (int)segment.Length);
}

public static byte[] Bytes(this Stream stream, int startIndex = 0, bool setBack = false)
{
var bytes = new byte[stream.Length];
if (stream.CanSeek && stream.CanRead)
{
var position = stream.Position;
stream.Seek(startIndex, SeekOrigin.Begin);
stream.Read(bytes, 0, (int)stream.Length);
if (setBack)
stream.Position = position;
}
return bytes;
}
}

class Program
{
static void Main(string[] args)
{
var stream = new MemoryStream();
var element = XElement.Parse(@"<root><group id=""0"" combiner=""or""><filter id=""1"" /><filter id=""2"" /></group></root>");
//var element = XElement.Parse("<a>i<b id='1' o='2' p=''/><b id='2'><c /></b><b id='3' /><b id='4' o='u'>2</b></a>");

var pie = new PathIndexedXElement(element);

foreach (var path in pie.Paths.OrderBy(p => p))
{
var s = pie.store[path];
var t = pie[path];
Console.WriteLine("> {2,-30} {0,-20} {1}", s, t, path);
}
}
}

public class PathIndexedXElement
{
internal string buffer;
internal ConcurrentDictionary<string, Segment> store;

public PathIndexedXElement(XElement element)
{
buffer = XmlPathSegmenter.StringBuffer(element);
store = element.PathSegments();
}

public IEnumerable<string> Paths
{
get { return store.Keys; }
}

public string this[string path]
{
get { return buffer.Segment(store[path]); }
}

public bool TryGetValue(string path, out string xelement)
{
Segment segment;
if (store.TryGetValue(path, out segment))
{
xelement = buffer.Segment(segment);
return true;
}
xelement = null;
return false;
}
}

public static class XmlPathSegmenter
{
public static XmlWriter CreateWriter(Stream stream)
{
var settings = new XmlWriterSettings() { Encoding = Encoding.UTF8, Indent = false, OmitXmlDeclaration = true, NewLineHandling = NewLineHandling.None };

return XmlWriter.Create(stream, settings);
}

public static MemoryStream MemoryBuffer(XElement element)
{
var stream = new MemoryStream();
var writer = CreateWriter(stream);
element.Save(writer);
writer.Flush();
stream.Position = 0;
return stream;
}

public static string StringBuffer(XElement element)
{
return Encoding.UTF8.GetString(MemoryBuffer(element).Bytes()).Substring(1);
}

public static ConcurrentDictionary<string, Segment> PathSegments(string xmlElement, ConcurrentDictionary<string, Segment> store = null)
{
return PathSegments(XElement.Parse(xmlElement), store);
}

public static ConcurrentDictionary<string, Segment> PathSegments(this XElement element, ConcurrentDictionary<string, Segment> store = null)
{
var stream = new MemoryStream();
var writer = CreateWriter(stream);
element.Save(writer);
writer.Flush();
stream.Position = 0;

return PathSegments(stream, store);
}

public static ConcurrentDictionary<string, Segment> PathSegments(Stream stream, ConcurrentDictionary<string, Segment> store = null)
{
if (store == null)
store = new ConcurrentDictionary<string, Segment>();

var stack = new ConcurrentStack<KeyValuePair<string, int>>();
PathSegments(stream, stack, store);

return store;
}

//
static void PathSegments(Stream stream, ConcurrentStack<KeyValuePair<string, int>> stack, ConcurrentDictionary<string, Segment> store)
{
var reader = XmlReader.Create(stream, new XmlReaderSettings() { });
var line = reader as IXmlLineInfo;

while (reader.Read())
{
KeyValuePair<string, int> ep;
ok:
if (reader.IsStartElement())
{
stack.TryPeek(out ep);
stack.Push(new KeyValuePair<string, int>(ep.Key + Path(reader), line.LinePosition - 2));
}

if (reader.IsEmptyElement)
{
var name = reader.LocalName;
var d = reader.Depth;
reader.Read();
if (stack.TryPop(out ep))
{
var length = line.LinePosition - 2 - ep.Value - (d > reader.Depth ? 1 : 0);
Console.WriteLine("/{3}|{0} : {1} -> {2}", name, ep.Value, length, line.LineNumber);

store.TryAdd(ep.Key, new Segment(ep.Value, length));
}
goto ok;
}

if (reader.NodeType == XmlNodeType.EndElement)
{
if (stack.TryPop(out ep))
{
var length = line.LinePosition + reader.LocalName.Length - ep.Value;
Console.WriteLine("|{3}|{0} : {1} -> {2}", reader.LocalName, ep.Value, length, line.LineNumber);

store.TryAdd(ep.Key, new Segment(ep.Value, length));
}
}

}
}
//

public static string Path(XmlReader element)
{
if (!(element.IsStartElement() || element.IsEmptyElement))
return null;

if (!element.HasAttributes)
return "/" + element.LocalName;
var id = element.GetAttribute("id");
return string.Format(id == null ? "/{0}" : "/{0}-{1}", element.LocalName, id);
}
}
}

输出:

/1|filter : 34 -> 17
/1|filter : 51 -> 17
|1|group : 6 -> 70
|1|root : 0 -> 83
> /root Segment(0, 83) <root><group id="0" combiner="or"><filter id="1" /><filter id="2" /></group></root>
> /root/group-0 Segment(6, 70) <group id="0" combiner="or"><filter id="1" /><filter id="2" /></group>
> /root/group-0/filter-1 Segment(34, 17) <filter id="1" />
> /root/group-0/filter-2 Segment(51, 17) <filter id="2" />

插入者正在发现 IXmlLineInfo由 XmlReader 类显式实现的接口(interface),这是一条很难找到的信息。

注意事项

在我收到关于 this question 的所有评论之后,现在有点预防 :) :

  • 集合的并发版本在此示例中不起作用。我知道并乐于使用它们 :)
  • 路径方案可以很容易地推广,但这涵盖了我的所有需求。
  • 我知道 id 通常用作文档范围内的唯一标识符,我很高兴在这个特定的上下文中使用它们。
  • 可以很容易地扩展段,使另一个长度属性指向开始标记的结束 > 符号,从而可以仅提取文档树中任何给定元素的属性,以上下文重建任何其他元素目标元素。对于浅树,这应该提供一个很好的常量因子来访问目标元素以及上下文信息。
  • 我完全清楚所有这些可能值得也可能不值得的尝试:我还没有我的场景的任何数字。只是想开发一种方法并与人们分享。

关于c# - 如何在 .NET 中获取 xml 元素的流位置,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/3613713/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com