Download image files from website with Python.
The module request can help us to fetch contents of web page, we can use BeautifulSoup to parse the html string and collect all interesting elements, download images by relevant URLs which are filtered by our script.

import requests, sys, webbrowser, bs4, urllib

if __name__ == "__main__":
    res = requests.get( "http://www.nipic.com/index.html" )
    res.raise_for_status()
    soup = bs4.BeautifulSoup( res.text, features='html.parser' )
    elements = soup.select( 'img' )
    elements = list(set(elements))
    count = min( 5,  len( elements ) )
    for i in range(1, count):
        urlStr = elements[i].get('src')
        file = urlStr.split( '/' )[-1]
        index = file.find( '.' )
        index = index + 4
        file = file[0:index]
        print( "Download: " + urlStr + " => file: " + file )
        r = requests.get( urlStr, stream=True )

        with open( file, 'wb' ) as f:
            f.write( r.content )
        print( "status code: ",r.status_code )
        print( "content-type: " + r.headers['content-type'] )
        print( "encoding: ", r.encoding )



Categories: Python

0 Comments

Leave a Reply

Your email address will not be published. Required fields are marked *

You cannot copy content of this page