衣服网站功能,深圳全网营销网站建设,成都网站seo分析,微商城源码最近在学习python,正如大家所知#xff0c;python在网络爬虫方面有着广泛的应用#xff0c;下面是一个利用python程序抓取网络图片的简单程序#xff0c;可以批量下载一个网站更新的图片#xff0c;其中使用了代理IP的技术。
import urllib.request
import os
import rand…最近在学习python,正如大家所知python在网络爬虫方面有着广泛的应用下面是一个利用python程序抓取网络图片的简单程序可以批量下载一个网站更新的图片其中使用了代理IP的技术。
import urllib.request
import os
import random
def url_open(url):requrllib.request.Request(url)#为请求设置user-agent,使得程序看起来更像一个人类req.add_header(User-Agent,Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0)#代理IP使用户能以不同IP访问从而防止被服务器发现iplist[1.193.162.123:8000,1.193.162.91:8000,1.193.163.32:8000]proxy_supporturllib.request.ProxyHandler({http:random.choice(iplist)})openerurllib.request.build_opener(proxy_support)opener.addheaders[(User-Agent,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.154 Safari/537.36 LBBROWSER)]urllib.request.install_opener(opener)responseurllib.request.urlopen(req)htmlresponse.read()return html
def get_page(url):htmlurl_open(url).decode(utf-8)ahtml.find(current-comment-page)23bhtml.find(],a)#print(html[a:b])return html[a:b]def find_imgs(url):htmlurl_open(url).decode(utf-8)img_addrs[]ahtml.find(img src)while a!-1:bhtml.find(.jpg,a,a140)if b!-1:if html[a9]!h:img_addrs.append(http:html[a9:b4])else:img_addrs.append(html[a9:b4])else:ba9ahtml.find(img src,b)for each in img_addrs:print(each我的打印)return img_addrsdef save_imgs(folder,img_addrs):for each in img_addrs:#print(one was saved)filenameeach.split(/)[-1]with open(filename,wb) as f:imgurl_open(each)f.write(img)def download_mm(folderooxx,pages10):os.mkdir(folder)os.chdir(folder)urlhttp://jandan.net/ooxx/page_numint(get_page(url))for i in range(pages):page_numpage_num-1page_urlurlpage-str(page_num)#commentsimg_addrsfind_imgs(page_url)save_imgs(folder,img_addrs)if __name____main__:download_mm()
完成
运行结果