gpt4 book ai didi

Python 请求 : How to get value of Blank Hidden Input

转载 作者:行者123 更新时间:2023-12-02 02:06:49 25 4
gpt4 key购买 nike

我正在尝试抓取此网站:https://case.occ.ok.gov/ords/f?p=1004:203

拼图中缺少的部分是弄清楚如何在发出最终请求之前“获取”数据负载中的 p_request 参数。查看“主”页面时,此字段显示为空,因此无法使用它传递到我的 POST 请求。

下面的代码不起作用,因为我的有效负载中有一个空白的 p_request 参数,尽管我通过开发者控制台测试知道,如果我能够获取 ,它就会起作用p_request 字段。

# Query Main Site to Build Payload
url = 'https://case.occ.ok.gov/ords/f?p=1004:203'
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')

# Get Cookie
cookies = {}
cookdat = r.cookies
cookies['ORA_WWV_APP_1004'] = cookdat.get('ORA_WWV_APP_1004')
cookies['X-Oracle-BMC-LBS-Route'] = cookdat.get('X-Oracle-BMC-LBS-Route')

# Create Payload
inputs = soup.select('input')
d_inputs = {i['id']:i.get('value','') for i in inputs}

data = [
('p_flow_id', '1004'),
('p_flow_step_id', '203'),
('p_instance', '%s'%d_inputs['pInstance']),
('p_debug', ''),
('p_request', ''),
('p_widget_name', 'worksheet'),
('p_widget_mod', 'PULL'),
('p_widget_action', ''),
('p_widget_num_return', '100000'),
('x01', '8980043036046866'),
('x02', '8985720770049096'),
('f01', 'R8980010866046866_column_search_current_column'),
('f01', 'R8980010866046866_search_field'),
('f01', 'R8980010866046866_row_select'),
('f02', ''),
('f02', ''),
('f02', '50'),
('p_json', '{"pageItems":{"itemsToSubmit":[{"n":"P203_LASTNAME","v":"%s"},{"n":"P203_FIRSTNAME","v":""},{"n":"P203_SEARCH_CRITERIA","v":"1"}],"protected":"%s","rowVersion":"","formRegionChecksums":[]},"salt":"%s"}'%(letter,d_inputs['pPageItemsProtected'],d_inputs['pSalt'])),
]

# POST request retrieve data
r = requests.post('https://case.occ.ok.gov/ords/wwv_flow.ajax', cookies=cookies, data=data)
print(r.text)

在开发者控制台中,我在进行我想要的提交类型时看到此字段出现,即使它在主页中为空白:

screenshot of dev console

如何“检索”此字段,该字段是请求正常工作所必需的?

最佳答案

这对我有用

import requests
import json
from bs4 import BeautifulSoup

# globals
users = []
letter = "A"

# session
session = requests.Session()

# get page
auth = session.get('https://case.occ.ok.gov/ords/f?p=1004:203')
soup = BeautifulSoup(auth.text, 'html.parser')

inputs = soup.select('input')
d_inputs = {i['id']: i.get('value', '') for i in inputs}

# create params
params = {
'p_flow_id': d_inputs['pFlowId'],
'p_flow_step_id': d_inputs['pFlowStepId'],
'p_instance': d_inputs['pInstance'],
'p_debug': '',
'p_request': 'Search',
'p_reload_on_submit': d_inputs['pReloadOnSubmit'],
'p_page_submission_id': d_inputs['pPageSubmissionId'],
'p_json': json.dumps({"pageItems": {
"itemsToSubmit": [
{"n": "P203_LASTNAME", "v": "{}".format(letter)},
{"n": "P203_FIRSTNAME", "v": ""},
{"n": "P203_SEARCH_CRITERIA", "v": "1"}
],
"protected": d_inputs['pPageItemsProtected'],
"rowVersion": "",
"formRegionChecksums": []
},
"salt": d_inputs['pSalt']
})
}

# Send request to APEX
session.post(
'https://case.occ.ok.gov/ords/wwv_flow.accept', data=params
)

# get page with data (first)
data_page = session.get(
'https://case.occ.ok.gov/ords/f?p=1004:203:{}::NO:::'.format(
d_inputs['pInstance']
)
)

table_soup = BeautifulSoup(data_page.text, 'html.parser')

# new params
inputs = table_soup.select('input')
d_inputs = {i['id']: i.get('value', '') for i in inputs}
json_ajax_data = json.loads(data_page.text.split(
'interactiveReport('
)[1].split(');})();')[0])

# get data for next pages
params_news = {
'p_flow_id': params['p_flow_id'],
'p_flow_step_id': params['p_flow_step_id'],
'p_instance': params['p_instance'],
'p_debug': '',
'p_request': 'PLUGIN={}'.format(json_ajax_data['ajaxIdentifier']),
'p_widget_name': 'worksheet',
'p_widget_mod': 'ACTION',
'p_widget_action': 'PAGE',
'p_widget_action_mod': 'pgR_min_row=51max_rows=50rows_fetched=50',
'p_widget_num_return': 50,
'x01': d_inputs['R8980010866046866_worksheet_id'],
'x02': d_inputs['R8980010866046866_worksheet_id'],
'p_json': params['p_json']
}

# get next page data
next_page = session.post(
'https://case.occ.ok.gov/ords/wwv_flow.ajax', data=params_news
)

next_page_soup = BeautifulSoup(next_page.text, 'html.parser')
next_page_table_with_data = table_soup.find('table', {'class': 'a-IRR-table'})
next_page_rows = next_page_table_with_data.find_all('tr')

# parse rows
for row_next_page in next_page_rows:
cells_next_page = row_next_page.find_all('td')
if len(cells_next_page) > 0:
users.append(
{
'name': cells_next_page[0].text, 'surname': cells_next_page[1].text
}
)

print(users)
[
{'name': 'ANDERSON', 'surname': 'MICHAEL L AND KAREN'},
{'name': 'ALVAREZ', 'surname': 'PETRA'},
...
]

关于Python 请求 : How to get value of Blank Hidden Input,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68322689/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com