返回

一个爬取图片的小脚本

发布时间:2023-01-14 23:05:10 233
# html# python
#!/usr/bin/python3
import requests
import os
from lxml import etree

if __name__ == '__main__':
url = "http://pic.netbian.com/4kmeinv/index.html"
headers = {
"User-Agent":"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13"
}
tree = etree.HTML(requests.get(url=url,headers=headers,proxies={"http":"209.127.191.180:9279"}).text)
if not os.path.exists("pic_libs"):
os.mkdir('./pic_libs')

li_list = tree.xpath('/html/body/div[2]/div/div[3]/ul/li')
for li in li_list:
tmp_url = li.xpath('./a/@href')[0]
page_url = "http://pic.netbian.com" + tmp_url
tree1 = etree.HTML(requests.get(url=page_url,headers=headers).text)
pic_url = "http://pic.netbian.com" + tree1.xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[2]/a/img/@src')[0]
name = tree1.xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[2]/a/img/@alt')[0]
name1 = name.encode('iso-8859-1').decode('gbk')
file_name = name1 + ".jpg"
with open("./pic_libs/" + file_name,'wb') as fp:
img_data = requests.get(url=pic_url,headers=headers).content
fp.write(img_data)
print(file_name,"下载成功!")
for i in range(2,125):
url = "http://pic.netbian.com/4kmeinv/index_" + str(i) + ".html"
tree = etree.HTML(requests.get(url=url,headers=headers).text)
if not os.path.exists("pic_libs"):
os.mkdir('./pic_libs')

li_list = tree.xpath('/html/body/div[2]/div/div[3]/ul/li')
for li in li_list:
tmp_url = li.xpath('./a/@href')[0]
page_url = "http://pic.netbian.com" + tmp_url
tree1 = etree.HTML(requests.get(url=page_url,headers=headers).text)
pic_url = "http://pic.netbian.com" + tree1.xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[2]/a/img/@src')[0]
name = tree1.xpath('/html/body/div[2]/div[1]/div[2]/div[1]/div[2]/a/img/@alt')[0]
name1 = name.encode('iso-8859-1').decode('gbk')
file_name = name1 + ".jpg"
with open("./pic_libs/" + file_name,'wb') as fp:
img_data = requests.get(url=pic_url,headers=headers).content
fp.write(img_data)
print(file_name,i,"下载成功!")


特别声明:以上内容(图片及文字)均为互联网收集或者用户上传发布,本站仅提供信息存储服务!如有侵权或有涉及法律问题请联系我们。
举报
评论区(0)
按点赞数排序
用户头像
精选文章
thumb 中国研究员首次曝光美国国安局顶级后门—“方程式组织”
thumb 俄乌线上战争,网络攻击弥漫着数字硝烟
thumb 从网络安全角度了解俄罗斯入侵乌克兰的相关事件时间线
下一篇
新手如何完成python视频爬虫 2023-01-14 22:54:44