gpt4 book ai didi

Python eTree Parser 没有附加元素

转载 作者:行者123 更新时间:2023-11-28 18:48:35 26 4
gpt4 key购买 nike

看看我的日志,看看它是怎么说我从 Postgres 返回的行已经从字符串变成了元素(我打印了字符串,打印了元素,打印了 isElement bool 值!)当我尝试附加它时,错误是它不是一个元素。呼,呼。

import sys
from HTMLParser import HTMLParser
from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, tostring
import psycopg2
import psycopg2.extras

def main():
# Connect to an existing database
conn = psycopg2.connect(dbname="**", user="**", password="**", host="/tmp/", port="**")

# Open a cursor to perform database operations
cur = conn.cursor(cursor_factory = psycopg2.extras.RealDictCursor)

cur.execute("SELECT * FROM landingpagedata;")
rows = cur.fetchall()

class LinksParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.tb = etree.TreeBuilder()

def handle_starttag(self, tag, attributes):
self.tb.start(tag, dict(attributes))

def handle_endtag(self, tag):
self.tb.end(tag)

def handle_data(self, data):
self.tb.data(data)

def close(self):
HTMLParser.close(self)
return self.tb.close()

template = 'template.html'



# parser.feed(open('landingIndex.html').read()) #for testing
# root = parser.close()

for row in rows:
parser = LinksParser()

parser.feed(open(template).read())
root = parser.close()




#title
title = root.find(".//title")
title.text = row['title']

#headline
h1_id_headline = root.find(".//h1")
h1_id_headline.text = row['h1_id_headline']
# print row['h1_id_headline']

#intro
p_class_intro = root.find(".//p[@class='intro']")
p_class_intro.text = row['p_class_intro']
# print row['p_class_intro']

问题就出在这里!

        #recommended
p_class_recommendedbackground = root.find(".//div[@class='recommended_background_div']")
print p_class_recommendedbackground
p_class_recommendedbackground.clear()
newElement = ET.fromstring(row['p_class_recommendedbackground'])
print row['p_class_recommendedbackground']
print ET.iselement(newElement)
p_class_recommendedbackground.append(newElement)

html = tostring(root)
f = open(row['page_name'], 'w').close()
f = open(row['page_name'], 'w')
f.write(html)
f.close()
# f = ''
# html = ''
parser.reset()
root = ''

# Close communication with the database
cur.close()
conn.close()

if __name__ == "__main__":
main()

我的日志是这样的:

{background: url(/images/courses/azRealEstate.png) center no-repeat;}
<Element 'div' at 0x10a999720>
<p class="recommended_background">Materials are are aimed to all aspiring real estate sales associates who wish to obtain the Arizona Real Estate Salesperson license, which is provided by the <a href="http://www.re.state.az.us/" style="text-decoration: underline;">Arizona Department of Real Estate</a>.</p>
True
Traceback (most recent call last):
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 108, in <module> main()
File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 84, in main
p_class_recommendedbackground.append(newElement)
TypeError: must be Element, not Element
[Finished in 0.1s with exit code 1]

最佳答案

我可以这样重现错误消息:

from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET

croot = etree.Element('root')
child = ET.Element('child')
croot.append(child)
# TypeError: must be Element, not Element

问题的根本原因是我们将 ElementTreecElementTree 实现与 xml.etree.ElementTree 实现混合在一起 元素树。永远不要让两人相遇。

所以解决方法就是选择一个,比如 etree,然后替换所有出现的另一个(例如,用 etree 替换 ET) .

关于Python eTree Parser 没有附加元素,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/16467640/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com