- c - 在位数组中找到第一个零
- linux - Unix 显示有关匹配两种模式之一的文件的信息
- 正则表达式替换多个文件
- linux - 隐藏来自 xtrace 的命令
我正在试验 http://robobrowser.readthedocs.org/en/latest/readme.html ,一个基于漂亮汤库的新 python 库。我试图通过打开一个 html 页面并将其返回到 django 应用程序中来对其进行测试,但我无法完成这个最简单的任务。我的 Django 应用包含:
def index(request):
p=str(request.POST.get('p', False)) # p='https://www.yahoo.com/'
browser = RoboBrowser(history=True)
postedmessage = browser.open(p)
return HttpResponse(postedmessage)
如何返回页面的所有 HTML?
最佳答案
您可以尝试使用 parsed
属性。
代码:
from robobrowser import RoboBrowser
url = "http://www.google.com"
br = RoboBrowser(history=True)
br.open(url)
print br.parsed
结果:
<!DOCTYPE html>
<html itemscope="" itemtype="http://schema.org/WebPage" lang="en-PH"><head><meta content="/images/google_favicon_128.png" itemprop="image"/><title>Google</title><script>(function(){
window.google={kEI:"-RFgU9LgJsq6uATKqYGoDg",getEI:function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI},https:function(){return"https:"==window.location.protocol},kEXPI:"17259,4000116,4007661,4007830,4008142,4009033,4009641,4010806,4010858,4010899,4011228,4011258,4011679,4012373,4012504,4012860,4013374,4013414,4013591,4013723,4013758,4013787,4013823,4013941,4013967,4013979,4014016,4014033,4014093,4014431,4014515,4014636,4014671,4014810,4014813,4014828,4014991,4015119,4015155,4015234,4015260,4015519,4015550,4015587,4015635,4015638,4015639,4015772,4015901,4016005,4016042,4016127,4016309,4016363,4016367,4016371,4016391,4016425,4016452,4016466,4016479,4016487,4016638,4016703,4016730,4016786,4016800,4016824,4016855,4016933,4016969,4016978,4017042,4017079,4017177,4017202,4017206,4017280,4017285,4017294,4017301,4017336,4017556,8300015,8300017,8500165,8500223,8500240,8500252,8500255,8500283,8500306,8500313,8500325,8500332,8500349,10200002,10200012,10200029,10200038,10200040,10200048,10200053,10200055,10200066,10200083,10200120,10200134,10200136,10200155,10200157,10200159,10200164,10200195,10200211,10200215,10200221,10200224,10200231,10200236,10200242,10200246,10200252",kCSI:{e:"17259,4000116,4007661,4007830,4008142,4009033,4009641,4010806,4010858,4010899,4011228,4011258,4011679,4012373,4012504,4012860,4013374,4013414,4013591,4013723,4013758,4013787,4013823,4013941,4013967,4013979,4014016,4014033,4014093,4014431,4014515,4014636,4014671,4014810,4014813,4014828,4014991,4015119,4015155,4015234,4015260,4015519,4015550,4015587,4015635,4015638,4015639,4015772,4015901,4016005,4016042,4016127,4016309,4016363,4016367,4016371,4016391,4016425,4016452,4016466,4016479,4016487,4016638,4016703,4016730,4016786,4016800,4016824,4016855,4016933,4016969,4016978,4017042,4017079,4017177,4017202,4017206,4017280,4017285,4017294,4017301,4017336,4017556,8300015,8300017,8500165,8500223,8500240,8500252,8500255,8500283,8500306,8500313,8500325,8500332,8500349,10200002,10200012,10200029,10200038,10200040,10200048,10200053,10200055,10200066,10200083,10200120,10200134,10200136,10200155,10200157,10200159,10200164,10200195,10200211,10200215,10200221,10200224,10200231,10200236,10200242,10200246,10200252",ei:"-RFgU9LgJsq6uATKqYGoDg"},authuser:0,ml:function(){},kHL:"en",time:function(){return(new Date).getTime()},log:function(a,b,c,h,k){var d=
new Image,f=google.lc,e=google.li,g="";d.onerror=d.onload=d.onabort=function(){delete f[e]};f[e]=d;c||-1!=b.search("&ei=")||(g="&ei="+google.getEI(h));c=c||"/"+(k||"gen_204")+"?atyp=i&ct="+a+"&cad="+b+g+"&zx="+google.time();a=/^http:/i;a.test(c)&&google.https()?(google.ml(Error("GLMM"),!1,{src:c}),delete f[e]):(d.src=c,google.li=e+1)},lc:[],li:0,y:{},x:function(a,b){google.y[a.id]=[a,b];return!1},load:function(a,b,c){google.x({id:a+l++},function(){google.load(a,b,c)})}};var l=0;})();
(function(){google.sn="webhp";google.timers={};google.startTick=function(a,b){var f=google.time();google.timers[a]={t:{start:f},bfr:!!b};};google.tick=function(a,b,f){google.timers[a]||google.startTick(a);google.timers[a].t[b]=f||google.time()};google.startTick("load",!0);
try{}catch(d){}})();
var _gjwl=location;function _gjuc(){var a=_gjwl.href.indexOf("#");if(0<=a&&(a=_gjwl.href.substring(a),0<a.indexOf("&q=")||0<=a.indexOf("#q="))&&(a=a.substring(1),-1==a.indexOf("#"))){for(var d=0;d<a.length;){var b=d;"&"==a.charAt(b)&&++b;var c=a.indexOf("&",b);-1==c&&(c=a.length);b=a.substring(b,c);if(0==b.indexOf("fp="))a=a.substring(0,d)+a.substring(c,a.length),c=d;else if("cad=h"==b)return 0;d=c}_gjwl.href="/search?"+a+"&cad=h";return 1}return 0}
function _gjh(){!_gjuc()&&window.google&&google.x&&google.x({id:"GJH"},function(){google.nav&&google.nav.gjh&&google.nav.gjh()})};
window._gjh&&_gjh();</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}</style><style>body,td,a,p,.h{font-family:arial,sans-serif}body{margin:0;overflow-y:scroll}#gog{padding:3px 8px 0}td{line-height:.8em}.gac_m td{line-height:17px}form{margin-bottom:20px}.h{color:#36c}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}em{font-weight:bold;font-style:normal}.lst{height:25px;width:496px}.gsfi,.lst{font:18px arial,sans-serif}.gsfs{font:17px arial,sans-serif}.ds{display:inline-box;display:inline-block;margin:3px 0 4px;margin-left:4px}input{font-family:inherit}a.gb1,a.gb2,a.gb3,a.gb4{color:#11c !important}body{background:#fff;color:black}a{color:#11c;text-decoration:none}a:hover,a:active{text-decoration:underline}.fl a{color:#36c}a:visited{color:#551a8b}a.gb1,a.gb4{text-decoration:underline}a.gb3:hover{text-decoration:none}#ghead a.gb2:hover{color:#fff !important}.sblc{padding-top:5px}.sblc a{display:block;margin:2px 0;margin-left:13px;font-size:11px}.lsbb{background:#eee;border:solid 1px;border-color:#ccc #999 #999 #ccc;height:30px}.lsbb{display:block}.ftl,#fll a{display:inline-block;margin:0 12px}.lsb{background:url(/images/srpr/nav_logo80.png) 0 -258px repeat-x;border:none;color:#000;cursor:pointer;height:30px;margin:0;outline:0;font:15px arial,sans-serif;vertical-align:top}.lsb:active{background:#ccc}.lst:focus{outline:none}#addlang a{padding:0 3px}</style><script></script></head><body bgcolor="#fff"><script>(function(){var src='/images/nav_logo176.png';var iesg=false;document.body.onload = function(){window.n && window.n();if (document.images){new Image().src=src;}
if (!iesg){document.f&&document.f.q.focus();document.gbqf&&document.gbqf.q.focus();}
}
})();</script><textarea id="csi" style="display:none"></textarea><div id="mngb"> <div id="gbar"><nobr><b class="gb1">Search</b> <a class="gb1" href="http://www.google.com.ph/imghp?hl=en&tab=wi">Images</a> <a class="gb1" href="http://maps.google.com.ph/maps?hl=en&tab=wl">Maps</a> <a class="gb1" href="https://play.google.com/?hl=en&tab=w8">Play</a> <a class="gb1" href="http://www.youtube.com/?gl=PH&tab=w1">YouTube</a> <a class="gb1" href="http://news.google.com.ph/nwshp?hl=en&tab=wn">News</a> <a class="gb1" href="https://mail.google.com/mail/?tab=wm">Gmail</a> <a class="gb1" href="https://drive.google.com/?tab=wo">Drive</a> <a class="gb1" href="http://www.google.com.ph/intl/en/options/" style="text-decoration:none"><u>More</u> »</a></nobr></div><div id="guser" width="100%"><nobr><span class="gbi" id="gbn"></span><span class="gbf" id="gbf"></span><span id="gbe"></span><a class="gb4" href="http://www.google.com.ph/history/optout?hl=en">Web History</a> | <a class="gb4" href="/preferences?hl=en">Settings</a> | <a class="gb4" href="https://accounts.google.com/ServiceLogin?hl=en&continue=http://www.google.com.ph/%3Fgfe_rd%3Dcr%26ei%3D-BFgU62INOmNiAeYnYDoAg" id="gb_70" target="_top">Sign in</a></nobr></div><div class="gbh" style="left:0"></div><div class="gbh" style="right:0"></div> </div><center><br clear="all" id="lgpd"/><div id="lga"><div style="padding:28px 0 3px"><div align="left" id="hplogo" onload="window.lol&&lol()" style="height:110px;width:276px;background:url(/images/srpr/logo9w.png) no-repeat" title="Google"><div nowrap="" style="color:#777;font-size:16px;font-weight:bold;position:relative;top:70px;left:218px">Philippines</div></div></div><br/></div><form action="/search" name="f"><table cellpadding="0" cellspacing="0"><tr valign="top"><td width="25%"> </td><td align="center" nowrap=""><input name="ie" type="hidden" value="ISO-8859-1"/><input name="hl" type="hidden" value="en-PH"/><input name="source" type="hidden" value="hp"/><div class="ds" style="height:32px;margin:4px 0"><input autocomplete="off" class="lst" maxlength="2048" name="q" size="57" style="color:#000;margin:0;padding:5px 8px 0 6px;vertical-align:top" title="Google Search" value=""/></div><br style="line-height:0"/><span class="ds"><span class="lsbb"><input class="lsb" name="btnG" type="submit" value="Google Search"/></span></span><span class="ds"><span class="lsbb"><input class="lsb" name="btnI" onclick="if(this.form.q.value)this.checked=1; else top.location='/doodles/'" type="submit" value="I'm Feeling Lucky"/></span></span></td><td align="left" class="fl sblc" nowrap="" width="25%"><a href="/advanced_search?hl=en-PH&authuser=0">Advanced search</a><a href="/language_tools?hl=en-PH&authuser=0">Language tools</a></td></tr></table><input id="gbv" name="gbv" type="hidden" value="1"/></form><div id="gac_scont"></div><div style="font-size:83%;min-height:3.5em"><br/><div id="als"><font id="addlang" size="-1">Google.com.ph offered in: <a href="http://www.google.com.ph/setprefs?sig=0_SbhOaVIheKTw2jFHRcEg8o-Evng%3D&hl=tl&source=homepage">Filipino</a> <a href="http://www.google.com.ph/setprefs?sig=0_SbhOaVIheKTw2jFHRcEg8o-Evng%3D&hl=ceb&source=homepage">Cebuano</a></font><br/><br/></div></div><span id="footer"><div style="font-size:10pt"><div id="fll" style="margin:19px auto;text-align:center"><a href="/intl/en/ads/">Advertising Programs</a><a href="http://www.google.com.ph/intl/en/services/">Business Solutions</a><a href="/intl/en/about.html">About Google</a><a href="http://www.google.com.ph/setprefdomain?prefdom=US&sig=0_dQ2pwXFotFQfDlj9qmDCkzdxCdA%3D" id="fehl">Google.com</a></div></div><p style="color:#767676;font-size:8pt">© 2013 - <a href="/intl/en/policies/">Privacy & Terms</a></p></span></center><div id="xjsd"></div><div data-jiis="bp" id="xjsi"><script>if(google.y)google.y.first=[];(function(){function b(a){window.setTimeout(function(){var c=document.createElement("script");c.src=a;document.getElementById("xjsd").appendChild(c)},0)}google.dljp=function(a){google.xjsu=a;b(a)};google.dlj=b;})();
if(!google.xjs){window._=window._||{};window._._DumpException=function(e){throw e};if(google.timers&&google.timers.load.t){google.timers.load.t.xjsls=new Date().getTime();}google.dljp('/xjs/_/js/k\x3dxjs.hp.en_US.RLLpSOAzMFM.O/m\x3dsb_he,pcc/rt\x3dj/d\x3d1/sv\x3d1/rs\x3dAItRSTOBXfxSyWrXjOGBi9e9cIs5cEBO6A');google.xjs=1;}google.pmc={"sb_he":{"agen":true,"cgen":true,"client":"heirloom-hp","dh":true,"ds":"","eqch":true,"fl":true,"host":"google.com.ph","jam":0,"jsonp":true,"msgs":{"cibl":"Clear Search","dym":"Did you mean:","lcky":"I\u0026#39;m Feeling Lucky","lml":"Learn more","oskt":"Input tools","psrc":"This search was removed from your \u003Ca href=\"/history\"\u003EWeb History\u003C/a\u003E","psrl":"Remove","sbit":"Search by image","srch":"Google Search"},"ovr":{},"pq":"","qcpw":false,"scd":10,"sce":5,"stok":"QGTPqfOgiEZ_AI3e5vphR6-NOmw"},"pcc":{}};google.y.first.push(function(){if(google.med){google.med('init');google.initHistory();google.med('history');}});if(google.j&&google.j.en&&google.j.xi){window.setTimeout(google.j.xi,0);}</script></div><script>(function(){if(google.timers&&google.timers.load.t){var b,c,d,e,g=function(a,f){a.removeEventListener?(a.removeEventListener("load",f,!1),a.removeEventListener("error",f,!1)):(a.detachEvent("onload",f),a.detachEvent("onerror",f))},h=function(a){e=(new Date).getTime();++c;a=a||window.event;a=a.target||a.srcElement;g(a,h)},k=document.getElementsByTagName("img");b=k.length;for(var l=c=0,m;l<b;++l)m=k[l],m.complete||"string"!=typeof m.src||!m.src?++c:m.addEventListener?(m.addEventListener("load",h,!1),m.addEventListener("error",
h,!1)):(m.attachEvent("onload",h),m.attachEvent("onerror",h));d=b-c;var n=function(){if(google.timers.load.t){google.timers.load.t.ol=(new Date).getTime();google.timers.load.t.iml=e;google.kCSI.imc=c;google.kCSI.imn=b;google.kCSI.imp=d;void 0!==google.stt&&(google.kCSI.stt=google.stt);google.csiReport&&google.csiReport()}};window.addEventListener?window.addEventListener("load",n,!1):window.attachEvent&&
window.attachEvent("onload",n);google.timers.load.t.prt=e=(new Date).getTime()};})();
</script></body></html>
[Finished in 4.3s]
不过,如何抓取取决于您。在任何情况下,如果您在了解此类新库时遇到困难,请始终阅读文档。
来自网站本身:
parsed
Lazily parse response content, using HTML parser specified by the browser.
或者您可以在 br
上使用 dir
并获得以下内容:
['__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_build_url', '_cursor', '_maxlen', '_states', '_traverse', '_update_state', 'back', 'find', 'find_all', 'follow_link', 'forward', 'get_form', 'get_forms', 'get_link', 'get_links', 'history', 'open', 'parsed', 'parser', 'response', 'select', 'session', 'state', 'submit_form', 'timeout', 'url']
[Finished in 5.3s]
如您所见,在最后显示了parsed
。
希望这对您有所帮助。
关于python - 如何使用 robobrowser 返回页面的 html,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23372311/
我正在处理一组标记为 160 个组的 173k 点。我想通过合并最接近的(到 9 或 10 个组)来减少组/集群的数量。我搜索过 sklearn 或类似的库,但没有成功。 我猜它只是通过 knn 聚类
我有一个扁平数字列表,这些数字逻辑上以 3 为一组,其中每个三元组是 (number, __ignored, flag[0 or 1]),例如: [7,56,1, 8,0,0, 2,0,0, 6,1,
我正在使用 pipenv 来管理我的包。我想编写一个 python 脚本来调用另一个使用不同虚拟环境(VE)的 python 脚本。 如何运行使用 VE1 的 python 脚本 1 并调用另一个 p
假设我有一个文件 script.py 位于 path = "foo/bar/script.py"。我正在寻找一种在 Python 中通过函数 execute_script() 从我的主要 Python
这听起来像是谜语或笑话,但实际上我还没有找到这个问题的答案。 问题到底是什么? 我想运行 2 个脚本。在第一个脚本中,我调用另一个脚本,但我希望它们继续并行,而不是在两个单独的线程中。主要是我不希望第
我有一个带有 python 2.5.5 的软件。我想发送一个命令,该命令将在 python 2.7.5 中启动一个脚本,然后继续执行该脚本。 我试过用 #!python2.7.5 和http://re
我在 python 命令行(使用 python 2.7)中,并尝试运行 Python 脚本。我的操作系统是 Windows 7。我已将我的目录设置为包含我所有脚本的文件夹,使用: os.chdir("
剧透:部分解决(见最后)。 以下是使用 Python 嵌入的代码示例: #include int main(int argc, char** argv) { Py_SetPythonHome
假设我有以下列表,对应于及时的股票价格: prices = [1, 3, 7, 10, 9, 8, 5, 3, 6, 8, 12, 9, 6, 10, 13, 8, 4, 11] 我想确定以下总体上最
所以我试图在选择某个单选按钮时更改此框架的背景。 我的框架位于一个类中,并且单选按钮的功能位于该类之外。 (这样我就可以在所有其他框架上调用它们。) 问题是每当我选择单选按钮时都会出现以下错误: co
我正在尝试将字符串与 python 中的正则表达式进行比较,如下所示, #!/usr/bin/env python3 import re str1 = "Expecting property name
考虑以下原型(prototype) Boost.Python 模块,该模块从单独的 C++ 头文件中引入类“D”。 /* file: a/b.cpp */ BOOST_PYTHON_MODULE(c)
如何编写一个程序来“识别函数调用的行号?” python 检查模块提供了定位行号的选项,但是, def di(): return inspect.currentframe().f_back.f_l
我已经使用 macports 安装了 Python 2.7,并且由于我的 $PATH 变量,这就是我输入 $ python 时得到的变量。然而,virtualenv 默认使用 Python 2.6,除
我只想问如何加快 python 上的 re.search 速度。 我有一个很长的字符串行,长度为 176861(即带有一些符号的字母数字字符),我使用此函数测试了该行以进行研究: def getExe
list1= [u'%app%%General%%Council%', u'%people%', u'%people%%Regional%%Council%%Mandate%', u'%ppp%%Ge
这个问题在这里已经有了答案: Is it Pythonic to use list comprehensions for just side effects? (7 个答案) 关闭 4 个月前。 告
我想用 Python 将两个列表组合成一个列表,方法如下: a = [1,1,1,2,2,2,3,3,3,3] b= ["Sun", "is", "bright", "June","and" ,"Ju
我正在运行带有最新 Boost 发行版 (1.55.0) 的 Mac OS X 10.8.4 (Darwin 12.4.0)。我正在按照说明 here构建包含在我的发行版中的教程 Boost-Pyth
学习 Python,我正在尝试制作一个没有任何第 3 方库的网络抓取工具,这样过程对我来说并没有简化,而且我知道我在做什么。我浏览了一些在线资源,但所有这些都让我对某些事情感到困惑。 html 看起来
我是一名优秀的程序员,十分优秀!