import requests
import os
import json
import certifi

os.environ[‘REQUESTS_CA_BUNDLE’] = certifi.where()
#url = ‘https://we.51job.com/api/job/search-pc?api_key=51job&timestamp=1735109432&keyword=python%E7%88%AC%E8%99%AB&searchType=2&function=&industry=&jobArea=000000&jobArea2=&landmark=&metro=&salary=&workYear=&degree=&companyType=&companySize=&jobType=&issueDate=&sortType=0&pageNum=1&requestId=&pageSize=20&source=1&accountId=217570594&pageCode=sou%7Csou%7Csoulb&scene=7’

基础链接,去除原有的 pageNum 参数

base_url = ‘https://we.51job.com/api/job/search-pc?api_key=51job&timestamp=1735109432&keyword=python%E7%88%AC%E8%99%AB&searchType=2&function=&industry=&jobArea=000000&jobArea2=&landmark=&metro=&salary=&workYear=&degree=&companyType=&companySize=&jobType=&issueDate=&sortType=0&requestId=&pageSize=20&source=1&accountId=217570594&pageCode=sou%7Csou%7Csoulb&scene=7’

he={
‘referer’: “https://we.51job.com/pc/search?keyword=python%E7%88%AC%E8%99%AB”,
‘user-agent’:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0’,
“cookie”:“guid=a39a45bebcd3beb5e30c402b643815bc; ps=needv%3D0; sensor=createDate%3D2022-10-21%26%7C%26identityType%3D1; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22217570594%22%2C%22first_id%22%3A%22193f677f656a9-01c79a4d65a09f9-4c657b58-655360-193f677f657623%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkzZjY3N2Y2NTZhOS0wMWM3OWE0ZDY1YTA5ZjktNGM2NTdiNTgtNjU1MzYwLTE5M2Y2NzdmNjU3NjIzIiwiJGlkZW50aXR5X2xvZ2luX2lkIjoiMjE3NTcwNTk0In0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22217570594%22%7D%2C%22%24device_id%22%3A%22193f677f656a9-01c79a4d65a09f9-4c657b58-655360-193f677f657623%22%7D; partner=SEM_pcbingpz_02; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20241225%26%7C%26; 51job=cuid%3D217570594%26%7C%26cusername%3D5A%252FqMpj9ez9tY9J3CrfQpFiX42NHUDXtEKDnWGNs8os%253D%26%7C%26cpassword%3D%26%7C%26cname%3DVeJutepWmSe61aTXMh0UtQ%253D%253D%26%7C%26cemail%3D5KeCgONl9DxjAYYJtjU%252B1nQ6PZhyh640BzGmNjc983o%253D%26%7C%26cemailstatus%3D0%26%7C%26cnickname%3D%26%7C%26ccry%3D.0vuORyJTH4uQ%26%7C%26cconfirmkey%3D2806qjteGgR4Q%26%7C%26cautologin%3D1%26%7C%26cenglish%3D0%26%7C%26sex%3D0%26%7C%26cnamekey%3D28.I0RusXfUQU%26%7C%26to%3D93c08ae46279873b2d6a0674746981e8676b6929%26%7C%26; tfstk=fk9x3ljeT40D-5E8w5GlSmGVjtooKdK2VE-QINb01ULJ5HQMmVRMW1QRvitGnfRRWUTRlZXi0CLJyUo4iNvMyNTHWGvDInt1WtJkiRbmuOK6-O3n-vDH0nWOC20nvgg7v9sQhiaGl3a5XQtGxvDH0nZccDxSKK0t6f_C5O1fGua5zGEfCST1P_sCxr665OiRNGjThow1l365zG615F6skjGAWo_LBCmnWZ8zW1e_CKMNMwUhPgSJe25f-nt_CRiCDsQBca3UFwWhNh59QlyTdnOe7MTblVQwe39XOFgzewOpVUCHM2evJES11ZJjjS7XnQO5hQuLtgxDXLBXZ2FO3ItBe6OQl2QBiEJV3d03sTAHuB1eaVFdRBxFHa8xd-K5TZ6RVpigTavRoF99lmwlqebD0KO-bztk-ZBf6K0Q6NORz1OHH4w6R3xH3_JtJz_wiMW5XdoQ6NC14TpHpFy3-wIgG0n8_55f4Nx23sUA6n9C2wmP45PNNgSR-0n8_55f4gQn4yPa__sP.; Hm_lvt_1370a11171bd6f2d9b1fe98951541941=1735006877,1735010265,1735085136,1735093043; search=jobarea%7E%60%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FApython%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C5%C0%B3%E6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; JSESSIONID=B7ACABB372D4578948041FF324338A14; acw_tc=ac11000117351114877896002e0099a94d3aaa7dbb571641b218a4f2699cfc; ssxmod_itna=eq0xgD97GQD=0=qq0LP0PDQ9cClDuDWuDOqxx7unyT5DsqhTDSxGKidDqxBnn6oI37Fe4TAiU3/Eu0KUinDwLNrTU7hdExgDB3DEx06NCBDYA1Dt4DTD34DYDixibGxi5GRD0KDFCqAoDzqD2l3XMCwtlRqtDm4GWCLxyD7fMDitXUqGgCw=D0qDBjCTtuEKXDauMfu=DQ4G0DA64xYXBludDKubDceiDQwcCDnbio/cSl4i0CMKyB4D+G9NyK0+xGe8XK=yS/D8q/n=DjTTD/Squ206WrHZ/l4GuDGQDfd9ghR+gudN51x=D4i2uiY4QgR42orS2ra0ot74khDTDoqBqP0DrgqiEqe7Q=gYWCxYGYqDxqEqrQp7DDi6ratDxD==; ssxmod_itna2=eq0xgD97GQD=0=qq0LP0PDQ9cClDuDWuDOqxx7unyqik6dNhDl=GnDj4R4zQ1/DCsKWkxKChQGwe6PliGLx6wD8oAW4uj+w3N4=IZxy2xhiQdyrrdf=UyEOltK4TMwe4Z7FYyjjwwD+BT=948db7qf68Atqa859f3AN/Bj6YLx04+j58BwOYTQItwQmmrdzS8E5fufWggwjUWAF5Ya2fWd/A3vN7u34B8bZAb2oo+f1qRQ6Qm5kcL37VAt2jSxhY35LBwROWfdjZ1IGERyjkxG2fqGcDiQQeD===; acw_sc__v2=676bb356cf354f12b83dd090f7f5def6caae9603”

}

定义获取指定页码数据的函数

def get_page_data(page_num):
url = base_url + ‘&pageNum=’ + str(page_num)
res = requests.get(url=url, headers=he)
res.encoding = ‘utf-8’ # 设置编码,防止乱码,可根据实际情况调整
return res.text

if not os.path.exists(‘k’):
os.mkdir(‘k’)

以职位ID作为文件名保存内容到文件中

使用for循环来遍历页码,提取多页内容并保存

for page_num in range(1, 11): # 这里假设提取1到10页,可根据实际需求修改范围
page_data = get_page_data(page_num)
with open(f"k/{page_num}.html", ‘w’, encoding=‘utf-8’) as f:
f.write(page_data)
print(f"第{page_num}页文件保存完成!")

注意cookie的值有时效性,当用不了的时候就更新一下。
下图是操作步骤

在这里插入图片描述

在这里插入图片描述

Logo

GitCode 天启AI是一款由 GitCode 团队打造的智能助手,基于先进的LLM(大语言模型)与多智能体 Agent 技术构建,致力于为用户提供高效、智能、多模态的创作与开发支持。它不仅支持自然语言对话,还具备处理文件、生成 PPT、撰写分析报告、开发 Web 应用等多项能力,真正做到“一句话,让 Al帮你完成复杂任务”。

更多推荐