Wednesday, February 4, 2026

selenium webdriver – Net Scrape and Obtain Pictures with Python Xpath Downside


I’m making an attempt to obtain all photographs that seem in Google picture search

I’m utilizing “Selenium” with Python, Pycharm

After I examine Chrome developer instrument to get Xpath i obtained the next:

//[@id=”islrg”]/div[1]/div[1]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[24]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[26]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[49]/a[1]/div[1]/img

//[@id=”islrg”]/div[1]/div[51]/div[1]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[51]/div[24]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[51]/div[26]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[51]/div[49]/a[1]/div[1]/img
//*[@id=”islrg”]/div[1]/div[51]/div[51]/a[1]/div[1]/img

//[@id=”islrg”]/div[1]/div[52]/div[20]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[52]/div[45]/a[1]/div[1]/img
//*[@id=”islrg”]/div[1]/div[52]/div[47]/a[1]/div[1]/img

//[@id=”islrg”]/div[1]/div[52]/div[70]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[52]/div[72]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[95]/a[1]/div[1]/img
//
[@id=”islrg”]/div[1]/div[53]/div[18]/a[1]/div[1]/img

1- Easy methods to get dynamically Xpath?!

after which making an attempt to get the precise picture url however i obtained encrypted ones
(https://encrypted-tbn0.gstatic.com/photographs?q=tbn:ANd9GcTWbVxeA8AzNYZoHKN91jM2UMG_g1pjlWN5kQ&usqp=CAU) by instance?

I put up my entire code right here:

import os
import time
import requests  # Import the requests module
from selenium import webdriver
from selenium.webdriver.chrome.choices import Choices
from selenium.webdriver.widespread.by import By
from PIL import Picture
import io
import pandas as pd
import sys

import msvcrt, time

books = []
def get_images_from_google(driver, delay, max_images):
    def scroll_down(driver):
        driver.execute_script("window.scrollTo(0, doc.physique.scrollHeight);")
        time.sleep(delay)

    image_urls = set()
    skips = 0

    whereas len(image_urls) + skips < max_images:
        scroll_down(driver)

        thumbnails = driver.find_elements(By.CSS_SELECTOR, '.rg_i, .Q4LuWd')
        for img in thumbnails[len(image_urls) + skips:max_images]:
            strive:
                img.click on()
                time.sleep(delay)
            besides:
                proceed

            photographs = driver.find_elements(By.CSS_SELECTOR, '.sFlh5c, .pT0Scc, .iPVvYb')
            for picture in photographs:
                if picture.get_attribute('src') in image_urls:
                    max_images += 1
                    skips += 1
                    break

                if picture.get_attribute('src') and 'http' in picture.get_attribute('src'):
                    image_urls.add(picture.get_attribute('src'))
                    print(f"Discovered {len(image_urls)}")


    return image_urls

def download_image(download_path, url, file_name):
    strive:
        image_content = requests.get(url).content material
        image_file = io.BytesIO(image_content)
        picture = Picture.open(image_file)
        print(url)

        # Verify if the picture could be recognized and is in a suitable format
        if picture.format not in ["JPEG", "PNG"]:
            print(f"Skipping picture with unsupported format: {url}")
            return

        file_path = os.path.be part of(download_path, file_name)  # Use os.path.be part of to make sure right path

        with open(file_path, "wb") as f:
            picture.save(f, "JPEG")

        print("Success")
    besides Exception as e:
        print('FAILED -', e)


# Ask the consumer for the search question
search_query = enter("Enter your Google Pictures search question: ")

# Create the 'imgs/' listing if it does not exist
download_path = "imgs"
os.makedirs(download_path, exist_ok=True)

# Create a Chrome driver
choices = Choices()
choices.add_argument("--start-maximized")
driver = webdriver.Chrome(choices=choices)

# Open the Google Pictures search web page with the supplied search question
search_url = f"https://www.google.com/search?q={search_query}&tbm=isch"
driver.get(search_url)

# Carry out picture scraping and downloading
urls = get_images_from_google(driver, 0.01, 10000)


for i, url in enumerate(urls):
        download_image(download_path, url, str(i) + ".JPEG")
        books.append([url])


df = pd.DataFrame(books, columns=['URL'])
df.to_csv('books.csv')
# Shut the driving force occasion
driver.stop()

2-how to enhance it to get the precise URL hyperlink of every picture?

Related Articles

LEAVE A REPLY

Please enter your comment!
Please enter your name here

Latest Articles