# Created by Han Xu# email:underturrets@163.comimport requestsimport urllib.requestimport urllib.parseimport osimport reclassSpider_bing_image():def__init__(self):""" @:brief @:return """ self.path=input("type in the path where you want to reserve the images:") self.url ='https://www4.bing.com/images/async?' self.headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48'} self.keyword =input("type in the keywords used to search in bing:") self.paginator =int(input("Type in the number of pages you want.Each page has almost 30 images:"))defget_urls(self):""" @:brief Get the URLs that you need to visit. @:return return a list of the URLs """ keyword = urllib.parse.quote(self.keyword) params = []for i inrange(1, self.paginator +1): params.append("q={}&first={}&count=35&cw=1233&ch=946&relp=35&datsrc=I&layout=RowBased_Landscape&apc=0&mmasync=1&dgState=x*303_y*1355_h*185_c*1_i*36_r*8&IG=6A228D01DCE044E685557DE143D55D91&SFX=2&iid=images.5554".format( keyword,30* i)) urls = []for i in params: urls.append(self.url + i)return urlsdefget_path(self):""" @:brief Get the path where you want to reserve the images. @:return """ dirname="./"+self.path dirname_origin = dirname int_index =0while(True): IsExist = os.path.exists(dirname)if (IsExist==False): os.mkdir(dirname) IsCreate=Truebreakelse: int_index+=1 dirname=dirname_origin+"({})".format(int_index)return dirname+"/"defget_image_url(self,urls):""" @:brief Get the URLs of images. @:return a list of URLs of images """ image_url = [] pattern_string="http[^%&]+.jpg" pattern = re.compile(pattern=pattern_string)for url in urls: url_txt = requests.get(url, headers=self.headers).text url_list=pattern.findall(url_txt)for i in url_list:if i: image_url.append(i)return image_urldefget_image(self,image_url):""" @:brief download the images into the path you set just @:return """ m =1for img_url in image_url:#定义一个flag用于判断下载图片是否异常 flag=Truetry:#urlretrieve() 方法直接将远程数据下载到本地print("第{}张图片的URL是{}".format(m,img_url))print("保存于{}".format(os.getcwd()+self.path[1:])) urllib.request.urlretrieve(img_url, self.path +str(m) +'.jpg')exceptBaseExceptionas error: flag=Falseprint(error)if(flag):#下载完成提示print('**********第'+str(m)+'张图片下载完成********')#每下载完后一张,m累加一次 m = m +1print('下载完成!')returndef__call__(self,*args,**kwargs):""" @brief the constrcution of the class @:return """ self.path=self.get_path() urls = self.get_urls() image_url = self.get_image_url(urls) self.get_image(image_url)return