diff --git a/.gitignore b/.gitignore index fbf80db..2760a16 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,5 @@ node_modules/ secret.py inscrawler/bin/* -!inscrawler/bin/.keep \ No newline at end of file +!inscrawler/bin/.keep +output \ No newline at end of file diff --git a/README.md b/README.md index e439a84..0cac2cc 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,8 @@ This crawler could fail due to updates on instagram’s website. If you encounte ## Install 1. Make sure you have Chrome browser installed. -2. Download [chromedriver](https://sites.google.com/a/chromium.org/chromedriver/) and put it into bin folder: `./inscrawler/bin/chromedriver` -3. Install Selenium: `pip3 install -r requirements.txt` -4. `cp inscrawler/secret.py.dist inscrawler/secret.py` +2. Install Selenium: `pip3 install -r requirements.txt` +3. `cp inscrawler/secret.py.dist inscrawler/secret.py` ## User Auth 1. Open `inscrawler/secret.py` file. diff --git a/inscrawler/bin/.keep b/inscrawler/bin/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/inscrawler/browser.py b/inscrawler/browser.py index e139b03..f62e7f8 100644 --- a/inscrawler/browser.py +++ b/inscrawler/browser.py @@ -8,21 +8,24 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.keys import Keys +from webdriver_manager.chrome import ChromeDriverManager from .utils import randmized_sleep class Browser: def __init__(self, has_screen): - dir_path = os.path.dirname(os.path.realpath(__file__)) + useragent = "Mozilla/5.0 (X11; Linux i686; rv:77.0) Gecko/20100101 Firefox/77.0" + service_args = ["--ignore-ssl-errors=true"] chrome_options = Options() + chrome_options.add_argument(f'--user-agent={useragent}') if not has_screen: chrome_options.add_argument("--headless") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome( - executable_path="%s/bin/chromedriver" % dir_path, + ChromeDriverManager().install(), service_args=service_args, chrome_options=chrome_options, ) diff --git a/requirements.txt b/requirements.txt index 0ce9a2f..9bf3963 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ future==0.16.0 selenium==3.9.0 tqdm==4.23.4 pre-commit==1.16.1 -black==19.3b0 \ No newline at end of file +black==19.3b0 +webdriver-manager==3.2.2 \ No newline at end of file