网站:
工具:python3 ,pycharm ,火狐浏览器自带F12功能
目标:获取妹子信息,将图片与个人信息分开保存在不同文件夹下
步骤:分为一下四步:
- 设置条件
- 解析网页
- 下载图片
- 保存信息
# coding=utf-8import osimport requests#设置条件def quary_age(): age = int(input("请输入期望对象年龄(如20):")) if 21 <= age <= 30: startage = 21 endage = 30 elif 31 <= age <= 40: startage = 31 endage = 40 elif 41 <= age <=50: startage = 41 endage = 50 return startage,endage;def quary_sex(): sex = input("请输入期望对象的性别(如女):") if sex == "男": gender = 1 else: gender = 2 return gender;def quary_height(): height = int(input("请输入期望对象身高(如163):")) if 151 <= height <161: startheight = 151 endheight = 160 elif 161 <= height <171: startheight = 161 endheight = 170 elif 171 <= height <181: startheight = 171 endheight = 180 elif 181 <= height <191: startheight = 181 endheight = 190 else: startheight = 0 endheight = 0 return startheight,endheight;def quary_salary(): money = int(input("请输入期望对象薪资(如2000):")) if 2000 <= money < 5000: salary = 2 elif 5000 <= money <10000: salary = 3 elif 10000 <= money <20000: salary = 4 elif money >= 20000: salary = 5 else: salary = 0 return salary;#查询符合条件的数据def quary_data(): print ("请输入你的筛选条件,开始本次姻缘:") startage, endage = quary_age() #年龄 gender = quary_sex() #性别 startheight, endheight = quary_height() #身高 salary = quary_salary() #薪资 for i in range(1,11): json = get_one(startage, endage,gender,startheight, endheight,salary,i) for item in json['data']['list']: save_image(item)#保持照片 save_info(item)#保存个人信息def save_image(item): if not os.path.exists('image'): os.mkdir('image') image_url = item['avatar'] response = requests.get(image_url) if response.status_code == 200: file_path = 'image/{}.jpg'.format(item['username']) if not os.path.exists(file_path) : #防止图片重复保存 print("正在获取%s的信息"%item['username']) with open(file_path,'wb') as f: f.write(response.content)#content获取图片内容 else: print("已经保存过当前信息")def save_info(item): if not os.path.exists('info'): os.mkdir('info') with open('info/'+item['username']+'.txt','w',encoding = 'utf-8') as f: f.write('名字:'+item['username']+',城市:'+item['city']+',身高:'+item['height']+',学历:'+item['education']+',个人签名:'+item['monolog']+',出生年份:'+item['birthdayyear'])#接受参数,返回json数据def get_one(startage, endage,gender,startheight, endheight,salary,page): headers = {'Referer':'http://www.lovewzly.com/jiaoyou.html', 'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/61.0'} base_url = 'http://www.lovewzly.com/api/user/pc/list/search?startage={}&endage={}&gender={}&cityid=60&startheight={}&endheight={}&marry=1&salary={}&page={}'.format(startage,endage,gender,startheight,endheight,salary,page) while True: try: response = requests.get(base_url,headers) if response.status_code == 200: return response.json()#也可以返回response.text,但要转码 except: return Nonequary_data()#http://www.lovewzly.com/api/user/pc/list/search?# startage=21&endage=30&gender=2&cityid=60&startheight=151&endheight=160&marry=1&salary=2&page=1
效果如下:
说明:条件设置有很多,像其他的如星座、学历、生肖我都没设定,如果需要,可以自己添加,城市我默认搜厦门,如果想搜其他城市,可以F12自己查看其他城市的value,自己修改。