gpt4 book ai didi

javascript - 从内联 JavaScript 中提取数据

转载 作者:行者123 更新时间:2023-12-03 04:03:59 24 4
gpt4 key购买 nike

我正在抓取的网页上的内联java脚本正在生成数据并为相应的div提供电话号码。网页源代码不显示数字,因此我无法使用 x-path、beautiful soup 等抓取数据。在分析页面源代码时,我发现 JavaScript 仅在页面源代码的脚本内部生成数字,除非页面,否则它们不可见源得到保存。这是特定的脚本:

<script>
QuidditaEnvironment.CurrentContactData={"ContactInfoName":null,"PhoneNumber1":"064/005-3708","PhoneNumber2":null,"City":null,"Address":"","Email":"srdjanmilosevickiseli@gmail.com","ShowOtherContactData":false,"ShowContactPhone":true,"ShowMyAdsPage":false,"Advertiser":{"DisplayName":"korisnik-404772","PartyType":1,"Comment":null,"CreatedAt":"2014-10-27T10:31:01","LastModifiedAt":"2014-10-27T10:31:01","ActivatedAt":"2014-10-27T10:32:13","IsDeleted":false,"IsHost":false,"ProfileDescription":null,"AccountBalance":0.0,"AccountBalanceString":null,"AvatarImageURL":null,"IsAvatarValid":false,"AvatarImages":null,"ContactInfos":[{"Name":"Glavni","Address":"Beograd,Zage Malivuk 2 008","ZipCode":null,"City":null,"Country":null,"Phone1":null,"Phone2":null,"Longitude":null,"Latitude":null,"Email":"srdjanmilosevickiseli@gmail.com","CreatedAt":"2014-10-27T10:31:01","LastModifiedAt":"2014-10-27T10:32:13","IsPrimary":true,"IsActive":true,"ShowOnMyAdsPage":true,"SequenceNumber":1,"ForHost":false,"Id":404772,"Version":3}],"StateName":"Aktivan","StateId":2,"IsInvestor":false,"IsSchool":false,"PartyGroupIds":[],"PartyGroupNames":[],"Address":null,"AdvertisingParty":true,"BlockAds":false,"NumberOfCVs":0,"EnforceIdentityUniqueness":true,"Slug":"korisnik-404772","Email":null,"SuppressAutomaticVerification":false,"Id":404772,"Version":4},"Latitudes":[],"Longitudes":[],"InvestorRoute":"nekretnine/novogradnja/korisnik-404772/projekti","UserAdsRoute":null,"SchoolRoute":"oglasi/korisnik-404772/posao/kursevi-i-obuke-pretraga","AvatarUrl":null,"NumberInRegister":null,"WebAddress":null,"Id":5425435147525,"Version":225,"Guid":"d93c6d04-5c96-4c80-aefb-fa644b1a1351"};QuidditaEnvironment.IsCurrentAdActive=true;
QuidditaEnvironment.CurrentClassified={"RelativeUrl":"/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525","HasAutomaticRenewal":false,"ValidToProlonged":null,"ShowInUnifiedAdvertiserAdList":true,"IsUsedMoveToTop":false,"ExpiresWithin48Hours":false,"UniqueId":"5425435147525_4","Id":"5425435147525","AdKindId":"4","IsPromoted":false,"IsInterestingInternal":false,"IsInterestingExternal":false,"InterestingEntryDate":null,"AdKindCode":"Premium","AdKindPosition":1,"StateId":101,"StoppageReasonIds":null,"StoppageReasonDescription":null,"Version":0,"Stamp":"2017-06-19T14:33:50.655Z","AdvertiserId":"404772","Title":"DIREKTNA PRODAJA","Text":"Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..","TextHtml":"<p>Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..</p>","PrintText":"","ContactInfoName":null,"PhoneNumber1":"+381640053708","PhoneNumber2":null,"Email":"True","Address":"","City":null,"ValidFrom":"2017-06-16T13:10:11Z","ValidFromForDisplay":"2017-06-16T13:10:11Z","ValidFromProlonged":null,"ValidTo":"2017-06-23T13:10:11Z","LastPublished":"2017-06-16T13:10:11Z","IsFirstOfKind":false,"CreatedAt":"2015-12-28T15:58:21Z","LastModifiedAt":"2017-06-16T13:09:42Z","IsArchived":false,"GeoLocationRPT":"44.801593,20.527653","ImageCount":6,"ImageURLs":["/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653261.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653262.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653264.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653265.jpg","/slike/oglasi/Originals/160228/direktna-prodaja-uknjizen-stan-5425435147525-71779653266.jpg"],"ImageTexts":["","","","","",""],"CategoryIds":[1,2,2001,12],"CategoryId":12,"CategoryHierarchyId":"1.2.2001.12.","CategoryNames":["Nekretnine","Stambeni prostor","Prodaja","Stan"],"CategoryFullName":"Nekretnine>Stambeni prostor>Prodaja>Stan","AdvertiserLogoUrlInternal":null,"AdvertiserLogoUrl":null,"VideoUrl":null,"CreatedByUserId":404772,"DeclarationId":404772,"EnclosureFilePath":null,"ListHTML":"&lt;div class=&quot;col-md-12 col-sm-12 col-xs-12 col-lg-12&quot;&gt;&lt;div class=&quot;row&quot;&gt;&lt;div class=&quot;product-item product-list-item Premium real-estates my-ad-placeholder&quot; data-id=&quot;5425435147525&quot; id=&quot;5425435147525&quot;&gt;&lt;div class=&quot;my-ad-sticker&quot;&gt;&lt;/div&gt;&lt;div class=&quot;small-arrow-map-ad&quot;&gt;&lt;/div&gt;&lt;div class=&quot;central-feature&quot;&gt;&lt;span data-value=&quot;31.000&quot;&gt;&lt;i&gt;31.000&amp;nbsp;€&lt;/i&gt;&lt;/span&gt;&lt;div class=&quot;price-shadow&quot;&gt;&lt;img src=&quot;/Content/assets/frontend/layout/img/price-shadow.png&quot; style=&quot;width:100%; height:10px;&quot; /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;col-md-4 col-sm-5 col-xs-4 col-lg-4&quot;&gt;&lt;div class=&quot;product-type&quot;&gt;&lt;/div&gt;&lt;figure class=&quot;pi-img-wrapper&quot;&gt;&lt;a class=&quot;a-images&quot; href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;&lt;img src=&#39;https://img.halooglasi.com//slike/oglasi/Thumbs/160228/m/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg&#39; class=&quot;&quot; alt=&quot;DIREKTNA PRODAJA&quot; onError=&quot;this.onerror = null; this.src = &amp;#39;/Content/Quiddita/Widgets/Product/Stylesheets/img/no-image.jpg&amp;#39;&quot;&gt;&lt;/a&gt;&lt;/figure&gt;&lt;div class=&quot;pi-img-wrapper-under&quot;&gt;&lt;span class=&quot;publish-date&quot;&gt;16.06.2017&lt;/span&gt;&lt;span class=&quot;basic-info&quot;&gt;&lt;span data-field-name=&#39;oglasivac_nekretnine_s&#39; data-field-value=&#39;vlasnik&#39;&gt;Vlasnik&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;col-md-6 col-sm-5 col-xs-6 col-lg-6 sm-margin&quot;&gt;&lt;h3 class=&quot;ad-title&quot;&gt;&lt;a href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;DIREKTNA PRODAJA&lt;/a&gt;&lt;/h3&gt;&lt;ul class=&quot;subtitle-places&quot;&gt;&lt;li&gt;Beograd&amp;nbsp;&lt;/li&gt;&lt;li&gt;Opština Zvezdara&amp;nbsp;&lt;/li&gt;&lt;li&gt;Mirijevo&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;ul class=&quot;ad-features &quot;&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;Stan&amp;nbsp;&lt;span class=&#39;legend&#39;&gt;Tip nekretnine&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;39&amp;nbsp;m&lt;sup&gt;2&lt;/sup&gt;&lt;span class=&#39;legend&#39;&gt;Kvadratura&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;li class=&#39;col-p-1-3&#39;&gt;&lt;div class=&#39;value-wrapper&#39;&gt;2.0&amp;nbsp;&lt;span class=&#39;legend&#39;&gt;Broj soba&lt;/span&gt;&lt;/div&gt;&lt;/li&gt;&lt;/ul&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;p class=&quot;text-description-list ad-description short-desc&quot;&gt;Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..&lt;/p&gt;&lt;/div&gt;&lt;div class=&quot;btns&quot;&gt;&lt;button type=&quot;button&quot; class=&quot;btn btn-circle btn-fav-ad-star fav-cmd favorite-ad-holder&quot; data-id=&quot;5425435147525&quot;&gt;&lt;/button&gt;&lt;span class=&quot;on-map&quot;&gt;&lt;/span&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;","GridHTML":"&lt;div class=&quot;col-md-6 col-sm-6 col-xs-12&quot;&gt;&lt;div class=&quot;product-item product-grid-item Premium real-estates my-ad-placeholder&quot;&gt;&lt;figure class=&quot;pi-img-wrapper&quot;&gt;&lt;a class=&quot;a-images&quot; href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;&lt;img src=&#39;https://img.halooglasi.com//slike/oglasi/Thumbs/160228/m/direktna-prodaja-uknjizen-stan-5425435147525-71779653263.jpg&#39; class=&quot;&quot; alt=&quot;DIREKTNA PRODAJA&quot; onError=&quot;this.onerror = null; this.src = &amp;#39;/Content/Quiddita/Widgets/Product/Stylesheets/img/no-image.jpg&amp;#39;&quot;&gt;&lt;/a&gt;&lt;span class=&quot;on-map&quot;&gt;&lt;/span&gt;&lt;div class=&quot;wrap-btn-fav&quot;&gt;&lt;div class=&quot;btn-group btn-group-solid&quot;&gt;&lt;button type=&quot;button&quot; class=&quot;btn btn-circle btn-fav-ad-star fav-cmd favorite-ad-holder&quot; data-id=&quot;5425435147525&quot;&gt;&lt;/button&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class=&quot;product-type&quot;&gt;&lt;/div&gt;&lt;/figure&gt;&lt;div class=&quot;central-feature&quot;&gt;&lt;span data-value=&quot;31.000&quot;&gt;&lt;i&gt;31.000&amp;nbsp;€&lt;/i&gt;&lt;/span&gt;&lt;div class=&quot;price-shadow&quot;&gt;&lt;img src=&quot;/Content/assets/frontend/layout/img/price-shadow.png&quot; style=&quot;width:100%; height:10px;&quot; /&gt;&lt;/div&gt;&lt;/div&gt;&lt;h3 class=&quot;ad-title&quot;&gt;&lt;a href=&quot;/nekretnine/prodaja-stanova/direktna-prodaja/5425435147525&quot;&gt;DIREKTNA PRODAJA&lt;/a&gt;&lt;/h3&gt;&lt;ul class=&quot;subtitle-places&quot;&gt;&lt;li&gt;Beograd&amp;nbsp;&lt;/li&gt;&lt;li&gt;Opština Zvezdara&amp;nbsp;&lt;/li&gt;&lt;li&gt;Mirijevo&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;p class=&quot;ad-description&quot;&gt;Hrastov parket,PVC stolarija,sigurnosna vrata,hodnik,spavaca soba,kupatilo,kuhinja sa prirodnom ventilacijom,dnevni boravak..klima..&lt;/p&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;div class=&quot;clear&quot;&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;","DoNotShowContactButton":null,"ContactButtonLink":null,"OtherFields":{"broj_soba_s":"2.0","grejanje_s":"EG","grad_s":"Beograd","lokacija_s":"Opština Zvezdara","mikrolokacija_s":"Mirijevo","kvadratura_d":39.0,"oglasivac_nekretnine_s":"Vlasnik","stanje_objekta_s":"Izvorno stanje","tip_nekretnine_s":"Stan","cena_d":31000.0,"dodatno_ss":["Odmah useljiv","Uknjižen"],"ostalo_ss":["Klima","Telefon","KATV","Internet","Interfon","Parking","Francuski balkon"],"tip_objekta_s":"Novogradnja","sprat_s":"3","sprat_od_s":"3","broj_soba_id_l":401,"grejanje_id_l":1543,"grad_id_l":35112,"lokacija_id_l":40788,"mikrolokacija_id_l":531542,"oglasivac_nekretnine_id_l":387237,"stanje_objekta_id_l":260581,"tip_nekretnine_id_l":8100000,"dodatno_id_ls":[12000001,12000004],"ostalo_id_ls":[12100002,12100010,12100011,12100012,12100013,12100017,12100018],"tip_objekta_id_l":387235,"sprat_id_l":12441300,"sprat_od_id_l":12441350,"broj_soba_order_i":4,"sprat_order_i":13,"kvadratura_d_unit_s":"m2","cena_d_unit_s":"EUR","defaultunit_kvadratura_d":39.0,"defaultunit_cena_d":31000.0,"_version_":1570636437658796032},"IsVerificationPending":false,"VerificationStateId":2,"InfoMessage":null,"TotalViews":21098,"TopCategoryCSSClass":null,"JobApplicationCount":0,"ShowAdvertiserAdsLink":false,"ShowMyAvatar":true,"IsOwnedByCurrentUser":false,"ThreeDTourExists":false,"UseRaiffeisenCreditCalculator":false,"CreditInstalment":null,"CreditTotalAmount":null}; for (var i in QuidditaEnvironment.CurrentClassified.OtherFields) { QuidditaEnvironment.CurrentClassified[i] = QuidditaEnvironment.CurrentClassified.OtherFields[i]; };
QuidditaEnvironment.IsUserOperator=false

QuidditaEnvironment.CurrentClassifiedInstances=[{"RelativeUrl":null,"HasAutomaticRenewal":false,"ValidToProlonged":null,"ShowInUnifiedAdvertiserAdList":false,"IsUsedMoveToTop":false,"ExpiresWithin48Hours":false,"UniqueId":null,"Id":null,"AdKindId":"4","IsPromoted":false,"IsInterestingInternal":false,"IsInterestingExternal":false,"InterestingEntryDate":null,"AdKindCode":"Premium","AdKindPosition":1,"StateId":101,"StoppageReasonIds":null,"StoppageReasonDescription":null,"Version":0,"Stamp":"0001-01-01T00:00:00","AdvertiserId":null,"Title":"DIREKTNA PRODAJA","Text":null,"TextHtml":null,"PrintText":null,"ContactInfoName":null,"PhoneNumber1":null,"PhoneNumber2":null,"Email":null,"Address":null,"City":null,"ValidFrom":"2017-06-16T13:10:11Z","ValidFromForDisplay":null,"ValidFromProlonged":null,"ValidTo":"2017-06-23T13:10:11Z","LastPublished":"2017-06-16T13:10:11Z","IsFirstOfKind":false,"CreatedAt":"0001-01-01T00:00:00","LastModifiedAt":null,"IsArchived":false,"GeoLocationRPT":null,"ImageCount":null,"ImageURLs":null,"ImageTexts":null,"CategoryIds":null,"CategoryId":0,"CategoryHierarchyId":null,"CategoryNames":["Nekretnine","Stambeni prostor","Prodaja","Stan"],"CategoryFullName":null,"AdvertiserLogoUrlInternal":null,"AdvertiserLogoUrl":null,"VideoUrl":null,"CreatedByUserId":0,"DeclarationId":null,"EnclosureFilePath":null,"ListHTML":null,"GridHTML":null,"DoNotShowContactButton":null,"ContactButtonLink":null,"OtherFields":null,"IsVerificationPending":false,"VerificationStateId":0,"InfoMessage":null,"TotalViews":0,"TopCategoryCSSClass":null,"JobApplicationCount":0,"ShowAdvertiserAdsLink":false,"ShowMyAvatar":false,"IsOwnedByCurrentUser":false,"ThreeDTourExists":false,"UseRaiffeisenCreditCalculator":false,"CreditInstalment":null,"CreditTotalAmount":null}</script>]

我正在使用 python、Selenium 和 phantom.js 抓取数据的时间太长。是否可以通过从源代码解析此内联 JavaScript 来提取数据?

最佳答案

有可能,是的。

令人难以置信的痛苦,也是的。

在简单的情况下,您只需执行简单的正则表达式检查即可。获取<script>的内容标签,然后只需查找类似 /[a-z][a-z0-9\.]+\s*=\s*(.*);/ 的内容。但是,该正则表达式肯定无法处理所有情况。

从那里,你必须有东西来处理其他情况,做一些事情,比如识别和解析 JSON 等。

当您完成时,您几乎已经编写了整个 JS 解析器。

超越 Selenium 和 Phantom.JS 的一种可能途径是尝试将其提供给 Node 进程,然后查看它设置的全局变量。但是,与示例代码片段的情况一样,它可能(并且会)抛出一些错误,因此可能无法正常工作。

老实说,你最好的选择是使用 Selenium 或 Phantom.js 之类的东西,并弄清楚如何让它们对你来说足够快。

关于javascript - 从内联 JavaScript 中提取数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44642374/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com