gpt4 book ai didi

c - 使用 libxml2 解析多文档 RELAX-NG 模式

转载 作者:行者123 更新时间:2023-11-30 17:01:44 28 4
gpt4 key购买 nike

我想将 RELAX-NG 架构转换为 schemaInfo 对象,以便可以在 codemirror 中使用它来完成 xml 补全。

https://codemirror.net/demo/xmlcomplete.html

xmllint 用法

当用于验证如下文档时,libxml2 已经支持多文档relax-NG 模式:

xmllint --schema myschema.rng mydoc.xml

问题

libxml2也可以用于解析多文档模式文件吗?

以下是多文档架构的示例:

这是一些我不理解的 libxml2 功能,但可能会有所帮助:

假设

我认为我必须使用以下工具将多文档架构转换为单个文档架构:https://github.com/h4l/rnginline/tree/master/rnginline

直接使用libxml2会很棒,因为这样我就可以支持模式而无需预处理。

更新2016年5月3日

正如您所看到的,解析relax-NG架构仅显示顶级文件,并且它不会包含使用relax-NG主文件中的include指令包含的任何文件(注意: relax-NG 模式可以分为多个文件)。

<!-- XHTML Basic -->

<grammar ns="http://www.w3.org/1999/xhtml"
xmlns="http://relaxng.org/ns/structure/1.0">

<include href="modules/datatypes.rng"/>
<include href="modules/attribs.rng"/>
<include href="modules/struct.rng"/>
<include href="modules/text.rng"/>
<include href="modules/hypertext.rng"/>
<include href="modules/list.rng"/>
<include href="modules/basic-form.rng"/>
<include href="modules/basic-table.rng"/>
<include href="modules/image.rng"/>
<include href="modules/param.rng"/>
<include href="modules/object.rng"/>
<include href="modules/meta.rng"/>
<include href="modules/link.rng"/>
<include href="modules/base.rng"/>

</grammar>

源代码

/**
* section: Tree
* synopsis: Navigates a tree to print element names
* purpose: Parse a file to a tree, use xmlDocGetRootElement() to
* get the root element, then walk the document and print
* all the element name in document order.
* usage: tree1 filename_or_URL
* test: tree1 test2.xml > tree1.tmp && diff tree1.tmp $(srcdir)/tree1.res
* author: Dodji Seketeli
* copy: see Copyright for the status of this software.
*/
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

#ifdef LIBXML_TREE_ENABLED


#define ANSI_COLOR_RED "\x1b[31m"
#define ANSI_COLOR_GREEN "\x1b[32m"
#define ANSI_COLOR_YELLOW "\x1b[33m"
#define ANSI_COLOR_BLUE "\x1b[34m"
#define ANSI_COLOR_MAGENTA "\x1b[35m"
#define ANSI_COLOR_CYAN "\x1b[36m"
#define ANSI_COLOR_RESET "\x1b[0m"


/*
*To compile this file using gcc you can type
*gcc `xml2-config --cflags --libs` -o xmlexample libxml2-example.c
*/

/**
* print_element_names:
* @a_node: the initial xml node to consider.
*
* Prints the names of the all the xml elements
* that are siblings or children of a given xml node.
*/

char* pad(int depth) {
// if (depth <= 0)
// return "";
char str[2000];
// sprintf(str, "%*s", " ", depth);
for (int i=0; i <= depth; i++) {
str[i] = ' ';
}
str[depth+1] = 0;
return &str;
}

static void
print_element_names(xmlNode * a_node, int depth)
{
xmlNode *cur_node = NULL;

for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) {
// if (strcmp(cur_node->name, "element") == 0) {
// printf("node type: Element, name: %s\n", cur_node->name);
printf("%s %s\n", pad(depth), cur_node->name);
for(xmlAttrPtr attr = cur_node->properties; NULL != attr; attr = attr->next)
{
printf("%s", ANSI_COLOR_MAGENTA);
printf("%s %s: ", pad(depth), attr->name);
xmlChar* value = xmlNodeListGetString(cur_node->doc, attr->children, 1);
printf("%s \n", value);
printf("%s", ANSI_COLOR_RESET);
}
// }

}

print_element_names(cur_node->children, depth+1);
}
}


/**
* Simple example to parse a file called "file.xml",
* walk down the DOM, and print the name of the
* xml elements nodes.
*/
int
main(int argc, char **argv)
{
xmlDoc *doc = NULL;
xmlNode *root_element = NULL;

if (argc != 2)
return(1);

/*
* this initialize the library and check potential ABI mismatches
* between the version it was compiled for and the actual shared
* library used.
*/
LIBXML_TEST_VERSION

/*parse the file and get the DOM */
doc = xmlReadFile(argv[1], NULL, 0);

if (doc == NULL) {
printf("error: could not parse file %s\n", argv[1]);
}

/*Get the root element node */
root_element = xmlDocGetRootElement(doc);

print_element_names(root_element, 0);

/*free the document */
xmlFreeDoc(doc);

/*
*Free the global variables that may
*have been allocated by the parser.
*/
xmlCleanupParser();

return 0;
}
#else
int main(void) {
fprintf(stderr, "Tree support not compiled in\n");
exit(1);
}
#endif

示例用法

[nix-shell:~/Desktop/projects/nlnet/nlnet]$ ./tree1 html5-rng/xhtml-basic.rng
grammar
ns: http://www.w3.org/1999/xhtml
include
href: modules/datatypes.rng
include
href: modules/attribs.rng
include
href: modules/struct.rng
include
href: modules/text.rng
include
href: modules/hypertext.rng
include
href: modules/list.rng
include
href: modules/basic-form.rng
include
href: modules/basic-table.rng
include
href: modules/image.rng
include
href: modules/param.rng
include
href: modules/object.rng
include
href: modules/meta.rng
include
href: modules/link.rng
include
href: modules/base.rng

最佳答案

虽然问题没有必要冗长,但要求的内容很清楚。从版本 2.9.14 开始,Libxml2 似乎无法解析包含内容,只能解析 URL 或查看 filesystem ,可能会在当前目录中搜索 href 属性名称的文件名。这可能已经回答了问题,但如果必须从内存中的缓冲区加载模式,则可能还不够。一种干净的方法可能是提供回调来解析 rng:include 指令,但 Libxml2 似乎没有提供这样的 API。另一种方法实际上可以带来更高效的操作,即在不使用 include 指令的情况下递归地将外部模式合并到单个模式中。以下代码适用于我合并中等复杂度的架构(8 个文件)。只需相应地更改路径和文件名即可。

#include <memory>
#include <string>
#include <stdexcept>
#include <unordered_set>
#include <filesystem>

#include <libxml/tree.h>
#include <libxml/xmlsave.h>

using namespace std;
namespace fs = std::filesystem;

using DocPtr = std::unique_ptr<xmlDoc, decltype(&xmlFreeDoc)>;

constexpr const char* SchemaBasePath = R"(D:\Schemas)";
constexpr const char* RngSchemaFilename = "Schema.rng";
constexpr const char* MergedSchemaSavePath = R"(D:\Schemas\Schema_Merged.rng)";
constexpr const char* RngNS = "rng";
constexpr const char* RngNSHref = "http://relaxng.org/ns/structure/1.0";

struct Qualifier
{
bool IsNamespace;
string Name;
string Value;
};

static DocPtr readDoc(const string_view& filepath);
static void followDoc(xmlDocPtr doc, vector<xmlNodePtr>& nodes, vector<Qualifier>& qualifiers);
static void followDoc(xmlNodePtr root, vector<xmlNodePtr>& nodes, vector<Qualifier>& qualifiers);
static void removeNode(xmlNodePtr element);
static string findHRef(const xmlNodePtr element);
static string getAttributeContent(const xmlAttrPtr attr);
static void saveDocToFile(xmlDocPtr doc, const string_view& filepath);
static void addNamespaceTo(vector<Qualifier>& qualifiers, xmlNsPtr ns);
static void addAttributeTo(vector<Qualifier>& qualifiers, xmlAttrPtr attr);

unordered_set<string> s_schemas;

int main()
{
LIBXML_TEST_VERSION;
auto packetRngPath = fs::u8path(SchemaBasePath) / RngSchemaFilename;
auto packetRngDoc = readDoc(packetRngPath.u8string());

vector<xmlNodePtr> nodes;
vector<Qualifier> qualifiers;
followDoc(packetRngDoc.get(), nodes, qualifiers);

auto newDoc = DocPtr(xmlNewDoc(nullptr), &xmlFreeDoc);
auto grammarNode = xmlNewChild((xmlNodePtr)newDoc.get(), nullptr, (const xmlChar*) "grammar", nullptr);
if (grammarNode == nullptr)
throw runtime_error("Can't create rng:grammar node");

auto rngNs = xmlNewNs(grammarNode, (const xmlChar*)RngNSHref, (const xmlChar*)RngNS);
if (rngNs == nullptr)
throw runtime_error("Can't find or create rng namespace");
xmlSetNs(grammarNode, rngNs);

for (auto qualifier : qualifiers)
{
// Recreate the gathered namespaces and attributes
if (qualifier.IsNamespace)
{
xmlNewNs(grammarNode, (const xmlChar*)qualifier.Value.data(),
(const xmlChar*)qualifier.Name.data());
}
else
{
xmlNewProp(grammarNode, (const xmlChar*)qualifier.Name.data(),
(const xmlChar*)qualifier.Value.data());
}
}

for (auto node : nodes)
{
if (xmlAddChild(grammarNode, node) == nullptr)
throw runtime_error("Can't add child node to grammar");
}

// This actually fixes the copied namespaces
// to share just one instance
if (xmlReconciliateNs(newDoc.get(), grammarNode) == -1)
throw runtime_error("Can't reconciliate namespaces");

saveDocToFile(newDoc.get(), MergedSchemaSavePath);

return 0;
}

DocPtr readDoc(const string_view& filepath)
{
return DocPtr(xmlReadFile(filepath.data(), nullptr,
XML_PARSE_NOBLANKS), &xmlFreeDoc);
}

void followDoc(xmlDocPtr doc, vector<xmlNodePtr>& nodes, vector<Qualifier>& qualifiers)
{
auto root = xmlDocGetRootElement(doc);

// Fetch namespaces
auto namespaces = xmlGetNsList(doc, root);
unsigned i = 0;
while (true)
{
auto ns = namespaces[i];
if (ns == nullptr)
break;

addNamespaceTo(qualifiers, ns);
i++;
}
xmlFree(namespaces);

// Fetch attributes
for (xmlAttrPtr attribute = root->properties; attribute; attribute = attribute->next)
addAttributeTo(qualifiers, attribute);

followDoc(root, nodes, qualifiers);
}

void followDoc(xmlNodePtr root, vector<xmlNodePtr>& nodes, vector<Qualifier>& qualifiers)
{
for (auto child = xmlFirstElementChild(root); child; child = xmlNextElementSibling(child))
{
string href;
if (child->ns != nullptr
&& string_view((const char*)child->ns->prefix) == "rng"
&& string_view((const char*)child->name) == "include"
&& (href = findHRef(child)).length() != 0)
{
if (s_schemas.find(href) == s_schemas.end())
{
auto schemaPath = fs::u8path(SchemaBasePath) / href;
auto doc = readDoc(schemaPath.u8string());
s_schemas.insert(href);
followDoc(doc.get(), nodes, qualifiers);
}

continue;
}

auto copied = xmlCopyNode(child, 1);
if (copied == nullptr)
throw runtime_error("Can't copy child node");

nodes.push_back(copied);
}
}

void addNamespaceTo(vector<Qualifier>& qualifiers, xmlNsPtr xmlNs)
{
for (auto ns : qualifiers)
{
// Ensure the namespace has not yet been added first
if (ns.IsNamespace && ns.Name == (const char*)xmlNs->prefix)
return;
}
qualifiers.push_back({ true, (const char*)xmlNs->prefix, (const char*)xmlNs->href });
}

void addAttributeTo(vector<Qualifier>& qualifiers, xmlAttrPtr xmlAttr)
{
for (auto attr : qualifiers)
{
// Ensure the namespace has not yet been added first
if (!attr.IsNamespace && attr.Name == (const char*)xmlAttr->name)
return;
}
qualifiers.push_back({ false, (const char*)xmlAttr->name, getAttributeContent(xmlAttr) });
}

void removeNode(xmlNodePtr element)
{
// Remove the existing ModifyDate. We recreate the element
xmlUnlinkNode(element);
xmlFreeNode(element);
}

string findHRef(const xmlNodePtr element)
{
for (xmlAttrPtr attr = element->properties; attr; attr = attr->next)
{
if (string_view((const char*)attr->name) == "href")
return getAttributeContent(attr);
}

return { };
}

string getAttributeContent(const xmlAttrPtr attr)
{
xmlChar* content = xmlNodeGetContent((const xmlNode*)attr);
if (content == nullptr)
return { };

unique_ptr<xmlChar, decltype(xmlFree)> contentFree(content, xmlFree);
return string((const char*)content);
}

void saveDocToFile(xmlDocPtr doc, const string_view& filepath)
{
auto ctx = xmlSaveToFilename(filepath.data(), "utf-8", XML_SAVE_FORMAT);
if (ctx == nullptr || xmlSaveDoc(ctx, doc) == -1 || xmlSaveClose(ctx) == -1)
throw runtime_error("Can't save XML document");
}

关于c - 使用 libxml2 解析多文档 RELAX-NG 模式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36850163/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com