网站后端模板,网站弹出广告gif出处,网站项目设计说明书,地方旅游网站建设方案通过以下代码可以爬取两大图片网站#xff08;百度和搜狗#xff09;的图片#xff0c;对于人工智能、深度学习中图片数据的搜集很有帮助#xff01;
一、爬取百度图片 该代码可以爬取任意百度图片中自定义的图片#xff1a;
import requests
import re
import time
imp…通过以下代码可以爬取两大图片网站百度和搜狗的图片对于人工智能、深度学习中图片数据的搜集很有帮助
一、爬取百度图片 该代码可以爬取任意百度图片中自定义的图片
import requests
import re
import time
import osdef saveImg(imgurlList, imgOs):for i in imgurlList:try:response requests.get(urli).contentexcept:print(error!)else:imgName i[28: 36]with open(imgOs imgName .jpg, wb) as file:file.write(response)print(i 下载完成!)def get_asjson(page, gsm, word):url fhttps://image.baidu.com/search/acjson?tnresultjson_comlogid9123806616981181340ipnrjct201326592isfpresultfrword{word}queryWord{word}cl2lm-1ieutf-8oeutf-8adpicidst-1zichdlatestcopyrightssetabwidthheightface0istype2qcnc1expermodenojcisAsyncpn{str(30 * int(page))}rn30gsm{gsm}{str(int(time.time() * 1000))}headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36,Referer: https://image.baidu.com/search/index?tnbaiduimageipnrct201326592cl2lm-1st-1fmresultfrsf1fmq1637758492843_Rpvicnc1zhdlatestcopyrightse1showtab0fb0widthheightface0istype2dyTabStrMCwzLDYsMiw0LDEsNSw4LDcsOQ%3D%3Dieutf-8sidwordhello,Cookie: BDqhfphello%26%26-10-1undefined%26%2628989%26%2635; BAIDUID0C2336F5F3D356371C46DF079632E0C8:FG1; BAIDUID_BFESS0C2336F5F3D356371C46DF079632E0C8:FG1; BIDUPSID0C2336F5F3D356371C46DF079632E0C8; __yjs_duid1_32693704d239fea9266064fc8a3d25631637737833661; PSTM1637737880; BDORZB490B5EBF6F3CD402E515D22BCDA1598; BDRCVFR[dG2JNJb_ajR]mk3SLVN4HKm; userFromnull; BDRCVFR[-pGxjrCMryR]mk3SLVN4HKm; delPer0; PSINO6; __yjs_st2_ZGU4ODA5ZTdmNzczMzgxNzRiZWZhNTdkODVkY2E5MzQ3NzM3Nzc2MzZlNjYzZmRiMWVjOTlmNWQzZDA3NWY1MzM2M2NkNjNmMjMzZWVlYzQxNGQ2ODIzYjlkNTdhYTUyZjdhNWQwNjQxZWE1YTI0MWZiNzQ1NTE0N2NlNTgwNjZjODlkNWVlZWI2ZDBkNjUzNmNiZDE3NzUyYTA4ZjkxYjI1NzNhODBjOGZhZTBmMzZkY2IwOWJmNjMxNjEzNmUxYjQxZmZhM2M1ODUzYTFkNTM4NTE5MzZjZjRkODliMTE1MmRmMDY1MjI4OGJiM2I3ZGMzMDdiNjI4MWE3NDgxZV83XzQyODU3N2M0; H_PS_PSSID35295_34446_35104_31254_35237_35049_34584_34505_35245_34578_34872_26350_35210_35145_22160; indexPageSugList%5B%22hello%22%2C%22bello%22%2C%22hello%20%22%5D; cleanHistoryStatus0; ab_sr1.0.1_MTJmNTIwNGNlNmI5NDg2YmZiZTI1OTM1MGZhNTJhZTZlMzVmODE2NmEwZjg5MjNlZWZjZWY1YTY3ZjQ2Yzc2MWZiNGRlODY2ZDJjOGE3N2RhMzg2NjcxZjEzY2ZiMDQ4ODNjYzgyZTZlNWM2NGQ4YjlhMzBlMWE1ZjU0ZTY2NzAxYmM0ZGRkOTM0MGI3NzUwOWZjODY2ODE5NmU1N2E1Yw}response requests.get(urlurl, headersheaders).text 1111gsm re.findall(gsm:(.*?),, response)[0]data re.findall(hoverURL:(.*?),, response)return gsm, dataif __name__ __main__:a 1ekey_word 阳台 # 修改你要爬取的关键字img key_word _img\\os.mkdir(img)for i in range(1, 2): #通过改变第二个数修改要爬取的页数asjson1 get_asjson(i, a, key_word)saveImg(asjson1[1], img)a asjson1[0]while True:asjson2 get_asjson(int(i) 1, a, key_word)saveImg(asjson2[1], img)a asjson2[0]break
二、爬取搜狗图片 该代码可以爬取任意搜狗图片中自定义的图片
from urllib.parse import quote
import requests# 填入需要搜索的内容
key_word quote(阳台)# 通过定义page决定爬取多少页每一页有48张图片
page50for page in range(1, page):startN(page-1)*48url https://pic.sogou.com/napi/pc/searchList?mode1start{}xml_len48query{}.format(startN,key_word)response requests.get(url)json_data response.json()allData json_data[data][items]img_urls[]i 0for data in allData:url data[thumbUrl]img_urls.append(url)ii1for num in range(i):datarequests.get(img_urls[num],timeout5).content# 此处需要修改保存路径with open(C:/Users/wbl/Desktop/AI/pc/L/pagestr(page)-str(num).jpg,wb)as file:file.write(data)print(num,下载完成)