Initial commit

37113c67 · Prasad Gaikwad · 4e3958c6 · 37113c67 · 37113c67 · 4e3958c6
Commit 37113c67 authored May 30, 2025 by Prasad Gaikwad
24 changed files
--- a/.ipynb_checkpoints/debug_data_blob_per_place-checkpoint.json
+++ b/.ipynb_checkpoints/debug_data_blob_per_place-checkpoint.json
--- a/.ipynb_checkpoints/debug_full_place_page-checkpoint.json
+++ b/.ipynb_checkpoints/debug_full_place_page-checkpoint.json
--- a/Dockerfile
+++ b/Dockerfile
-# Use an official Python runtime as a parent image
-FROM python:3.10-slim
-
-# Set environment variables
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
-
-# Set work directory
-WORKDIR /app
-
-# Install system dependencies required by Playwright's browsers
-# Using the combined command to install dependencies for all browsers
-# See: https://playwright.dev/docs/docker#install-system-dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    # --- Playwright dependencies ---
-    libnss3 libnspr4 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 \
-    # --- Other useful packages ---
-    curl \
-    # --- Cleanup ---
-    && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \
-    && rm -rf /var/lib/apt/lists/*
-
-# Copy the requirements file into the container at /app
-COPY requirements.txt setup.py ./
-
-# Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-RUN pip install -e . --no-deps
-
-# Install Playwright browsers
-# This command downloads the browser binaries into the image
-RUN playwright install --with-deps
-
-# Copy the rest of the application code into the container at /app
-COPY . .
-
-# Expose the port the app runs on
-EXPOSE 8001
-
-# Define the command to run the application
-# Use 0.0.0.0 to make it accessible from outside the container
-CMD ["uvicorn", "gmaps_scraper_server.main_api:app", "--host", "0.0.0.0", "--port", "8001"]
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
-MIT License
-
-Copyright (c) 2025 conor-is-my-name
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/README.md
+++ b/README.md
-# Google Maps Scraper API
-
-A FastAPI service for scraping Google Maps data based on search queries. Ideal for n8n users.
-
-Very high performance, watch out for rate limiting!
-
-Use variables to replace URL parameters
-
-scrape-get?query=hotels%20in%2098392&max_places=100&lang=en&headless=true"
-
-If using n8n or other automation, use the /scrape-get endpoint for it to return results
-
-simple install, copy files and run docker compose up -d
-
-Intened to be used with this n8n build: 
-https://github.com/conor-is-my-name/n8n-autoscaling 
-
-## API Endpoints
-
-### POST `/scrape`
-Main scraping endpoint (recommended for production)
-
-**Parameters:**
- `query` (required): Search query (e.g., "hotels in 98392")
- `max_places` (optional): Maximum number of results to return
- `lang` (optional, default "en"): Language code for results
- `headless` (optional, default true): Run browser in headless mode
-
-### GET `/scrape-get`
-Alternative GET endpoint with same functionality
-
-### GET `/`
-Health check endpoint
-
-## Example Requests
-
-### POST Example
-```bash
-curl -X POST "http://localhost:8001/scrape" \
-H "Content-Type: application/json" \
-d '{
-  "query": "hotels in 98392",
-  "max_places": 10,
-  "lang": "en",
-  "headless": true
-}'
-```
-
-### GET Example
-```bash
-curl "http://localhost:8001/scrape-get?query=hotels%20in%2098392&max_places=10&lang=en&headless=true"
-```
-or
-
-```bash
-curl "http://gmaps_scraper_api_service:8001/scrape-get?query=hotels%20in%2098392&max_places=10&lang=en&headless=true"
-```
-
-
-## Running the Service
-
-### Docker
-```bash
-docker-compose up --build
-```
-
-### Local Development
-1. Install dependencies:
-```bash
-pip install -r requirements.txt
-```
-
-2. Run the API:
-```bash
-uvicorn gmaps_scraper_server.main_api:app --reload
-```
-
-
-The API will be available at `http://localhost:8001`
-
-or for docker:
-
-`http://gmaps_scraper_api_service:8001`
-
-## Notes
- For production use, consider adding authentication
- The scraping process may take several seconds to minutes depending on the number of results
- Results format depends on the underlying scraper implementation
\ No newline at end of file
--- a/debug_data_blob_per_place.json
+++ b/debug_data_blob_per_place.json
--- a/debug_full_place_page.json
+++ b/debug_full_place_page.json
--- a/docker-compose.yml
+++ b/docker-compose.yml
-
-version: '3.8'
-
-services:
-  scraper-api:
-    build: . # Build the image from the Dockerfile in the current directory
-    container_name: gmaps_scraper_api_service # Optional: specify a container name
-    ports:
-      - "8001:8001" # Map host port 8001 to container port 8001
-    restart: unless-stopped # Restart policy
-    volumes:
-      - .:/app # Mount current directory to /app in container
-    working_dir: /app # Set working directory to mounted volume
-    networks:
-      - shark
-    # Optional: Add environment variables if needed for configuration
-    # environment:
-    #   - HEADLESS_MODE=true
-    cpu_shares: 1024 # Add cpu_shares here if not using Swarm mode
-
-# Create the external network first with:
-# docker network create shark
-networks:
-  shark:
-    external: true
--- a/gmaps_scraper_server/.ipynb_checkpoints/Coordinates-checkpoint.ipynb
+++ b/gmaps_scraper_server/.ipynb_checkpoints/Coordinates-checkpoint.ipynb
--- a/gmaps_scraper_server/.ipynb_checkpoints/extractor-checkpoint.py
+++ b/gmaps_scraper_server/.ipynb_checkpoints/extractor-checkpoint.py
--- a/gmaps_scraper_server/.ipynb_checkpoints/main_api-checkpoint.py
+++ b/gmaps_scraper_server/.ipynb_checkpoints/main_api-checkpoint.py
+from fastapi import FastAPI, HTTPException, Query
+from typing import Optional, List, Dict, Any
+import logging
+
+# Import the scraper function (adjust path if necessary)
+try:
+    from gmaps_scraper_server.scraper import scrape_google_maps
+except ImportError:
+    # Handle case where scraper might be in a different structure later
+    logging.error("Could not import scrape_google_maps from scraper.py")
+    # Define a dummy function to allow API to start, but fail on call
+    def scrape_google_maps(*args, **kwargs):
+        raise ImportError("Scraper function not available.")
+
+# Configure basic logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+app = FastAPI(
+    title="Google Maps Scraper API",
+    description="API to trigger Google Maps scraping based on a query.",
+    version="0.1.0",
+)
+
+@app.post("/scrape", response_model=List[Dict[str, Any]])
+async def run_scrape(
+    query: str = Query(..., description="The search query for Google Maps (e.g., 'restaurants in New York')"),
+    max_places: Optional[int] = Query(None, description="Maximum number of places to scrape. Scrapes all found if None."),
+    lang: str = Query("en", description="Language code for Google Maps results (e.g., 'en', 'es')."),
+    headless: bool = Query(True, description="Run the browser in headless mode (no UI). Set to false for debugging locally.")
+):
+    """
+    Triggers the Google Maps scraping process for the given query.
+    """
+    logging.info(f"Received scrape request for query: '{query}', max_places: {max_places}, lang: {lang}, headless: {headless}")
+    try:
+        results = await scrape_google_maps(
+            query=query,
+            max_places=max_places,
+            lang=lang,
+            headless=headless,
+            lat=lat,
+            lng=lng
+        )
+        logging.info(f"Scraping finished for query: '{query}'. Found {len(results)} results.")
+        return results
+    except ImportError as e:
+         logging.error(f"ImportError during scraping for query '{query}': {e}")
+         raise HTTPException(status_code=500, detail="Server configuration error: Scraper not available.")
+    except Exception as e:
+        logging.error(f"An error occurred during scraping for query '{query}': {e}", exc_info=True)
+        # Consider more specific error handling based on scraper exceptions
+        raise HTTPException(status_code=500, detail=f"An internal error occurred during scraping: {str(e)}")
+
+@app.get("/scrape-get", response_model=List[Dict[str, Any]])
+async def run_scrape_get(
+    query: str = Query(...),
+    max_places: Optional[int] = Query(None),
+    lang: str = Query("en"),
+    headless: bool = Query(True),
+    lat: Optional[float] = Query(None),
+    lng: Optional[float] = Query(None),
+    max_distance_km: float = Query(30.0, description="Maximum distance in kilometers from (lat, lng)")
+):
+    try:
+        results = await scrape_google_maps(
+            query=query,
+            max_places=max_places,
+            lang=lang,
+            headless=headless,
+            lat=lat,
+            lng=lng,
+            max_distance_km=max_distance_km
+        )
+        return results
+    except Exception as e:
+        logging.error(f"Error: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal Server Error")
+        
+@app.get("/")
+async def read_root():
+    return {"message": "Google Maps Scraper API is running."}
--- a/gmaps_scraper_server/.ipynb_checkpoints/scraper-checkpoint.py
+++ b/gmaps_scraper_server/.ipynb_checkpoints/scraper-checkpoint.py
--- a/gmaps_scraper_server/Coordinates.ipynb
+++ b/gmaps_scraper_server/Coordinates.ipynb
--- a/gmaps_scraper_server/__pycache__/__init__.cpython-310.pyc
+++ b/gmaps_scraper_server/__pycache__/__init__.cpython-310.pyc
--- a/gmaps_scraper_server/__pycache__/extractor.cpython-310.pyc
+++ b/gmaps_scraper_server/__pycache__/extractor.cpython-310.pyc
--- a/gmaps_scraper_server/__pycache__/main_api.cpython-310.pyc
+++ b/gmaps_scraper_server/__pycache__/main_api.cpython-310.pyc
--- a/gmaps_scraper_server/__pycache__/scraper.cpython-310.pyc
+++ b/gmaps_scraper_server/__pycache__/scraper.cpython-310.pyc
--- a/gmaps_scraper_server/extractor.py
+++ b/gmaps_scraper_server/extractor.py
 import json
 import re
+import csv
+import os

 def safe_get(data, *keys):
    """
@@ -250,6 +252,7 @@ def get_thumbnail(data):
 def extract_place_data(html_content):
    """
    High-level function to orchestrate extraction from HTML content.
+    Saves extracted data into 'extracted_places.csv'.
    """
    json_str = extract_initial_json(html_content)
    if not json_str:
@@ -261,7 +264,16 @@ def extract_place_data(html_content):
        print("Failed to parse JSON data or find expected structure.")
        return None

-    # Now extract individual fields using the helper functions
+    print("Parsed data_blob type:", type(data_blob))
+    if isinstance(data_blob, list):
+        print("data_blob length:", len(data_blob))
+        with open("debug_data_blob_per_place.json", "w", encoding="utf-8") as f:
+            json.dump(data_blob, f, indent=2)
+
+    with open("debug_full_place_page.json", "w", encoding="utf-8") as f:
+        json.dump(data_blob, f, indent=2)
+
+    # Extract individual fields
    place_details = {
        "name": get_main_name(data_blob),
        "place_id": get_place_id(data_blob),
@@ -271,15 +283,40 @@ def extract_place_data(html_content):
        "reviews_count": get_reviews_count(data_blob),
        "categories": get_categories(data_blob),
        "website": get_website(data_blob),
-        "phone": get_phone_number(data_blob), # Needs index verification
-        "thumbnail": get_thumbnail(data_blob), # Needs index verification
-        # Add other fields as needed
+        "phone": get_phone_number(data_blob),
+        "thumbnail": get_thumbnail(data_blob),
    }

-    # Filter out None values if desired
+    # Flatten coordinates into latitude and longitude for CSV
+    coords = place_details.pop("coordinates", None)
+    if coords:
+        place_details["latitude"] = coords.get("latitude")
+        place_details["longitude"] = coords.get("longitude")
+
+    # Filter out None values
    place_details = {k: v for k, v in place_details.items() if v is not None}

-    return place_details if place_details else None
+    # Save to CSV if valid
+    if place_details:
+        save_to_csv(place_details, "extracted_places.csv")
+        return place_details
+    else:
+        print("No valid fields found to save.")
+        return None
+
+
+def save_to_csv(data: dict, filename: str):
+    """Appends a single row dictionary to a CSV file, creating it if it doesn't exist."""
+    file_exists = os.path.isfile(filename)
+    with open(filename, mode='a', encoding='utf-8', newline='') as f:
+        writer = csv.DictWriter(f, fieldnames=data.keys())
+
+        if not file_exists:
+            writer.writeheader()
+
+        writer.writerow(data)
+        print(f"Saved extracted data to {filename}")
+

 # Example usage (for testing):
 if __name__ == '__main__':

--- a/gmaps_scraper_server/main_api.py
+++ b/gmaps_scraper_server/main_api.py
@@ -33,14 +33,13 @@ async def run_scrape(
    """
    logging.info(f"Received scrape request for query: '{query}', max_places: {max_places}, lang: {lang}, headless: {headless}")
    try:
-        # Run the potentially long-running scraping task
-        # Note: For production, consider running this in a background task queue (e.g., Celery)
-        # to avoid blocking the API server for long durations.
-        results = await scrape_google_maps( # Added await
+        results = await scrape_google_maps(
            query=query,
            max_places=max_places,
            lang=lang,
-            headless=headless # Pass headless option from API
+            headless=headless,
+            lat=lat,
+            lng=lng
        )
        logging.info(f"Scraping finished for query: '{query}'. Found {len(results)} results.")
        return results
@@ -54,42 +53,29 @@ async def run_scrape(

 @app.get("/scrape-get", response_model=List[Dict[str, Any]])
 async def run_scrape_get(
-    query: str = Query(..., description="The search query for Google Maps (e.g., 'restaurants in New York')"),
-    max_places: Optional[int] = Query(None, description="Maximum number of places to scrape. Scrapes all found if None."),
-    lang: str = Query("en", description="Language code for Google Maps results (e.g., 'en', 'es')."),
-    headless: bool = Query(True, description="Run the browser in headless mode (no UI). Set to false for debugging locally.")
+    query: str = Query(...),
+    max_places: Optional[int] = Query(None),
+    lang: str = Query("en"),
+    headless: bool = Query(True),
+    lat: Optional[float] = Query(None),
+    lng: Optional[float] = Query(None),
+    max_distance_km: float = Query(30.0, description="Maximum distance in kilometers from (lat, lng)")
 ):
-    """
-    Triggers the Google Maps scraping process for the given query via GET request.
-    """
-    logging.info(f"Received GET scrape request for query: '{query}', max_places: {max_places}, lang: {lang}, headless: {headless}")
    try:
-        # Run the potentially long-running scraping task
-        # Note: For production, consider running this in a background task queue (e.g., Celery)
-        # to avoid blocking the API server for long durations.
-        results = await scrape_google_maps( # Added await
+        results = await scrape_google_maps(
            query=query,
            max_places=max_places,
            lang=lang,
-            headless=headless # Pass headless option from API
+            headless=headless,
+            lat=lat,
+            lng=lng,
+            max_distance_km=max_distance_km
        )
-        logging.info(f"Scraping finished for query: '{query}'. Found {len(results)} results.")
        return results
-    except ImportError as e:
-         logging.error(f"ImportError during scraping for query '{query}': {e}")
-         raise HTTPException(status_code=500, detail="Server configuration error: Scraper not available.")
    except Exception as e:
-        logging.error(f"An error occurred during scraping for query '{query}': {e}", exc_info=True)
-        # Consider more specific error handling based on scraper exceptions
-        raise HTTPException(status_code=500, detail=f"An internal error occurred during scraping: {str(e)}")
-
-
-# Basic root endpoint for health check or info
+        logging.error(f"Error: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal Server Error")
+        
 @app.get("/")
 async def read_root():
    return {"message": "Google Maps Scraper API is running."}
-
-# Example for running locally (uvicorn main_api:app --reload)
-# if __name__ == "__main__":
-#     import uvicorn
-#     uvicorn.run(app, host="0.0.0.0", port=8001)
\ No newline at end of file
--- a/gmaps_scraper_server/scraper.py
+++ b/gmaps_scraper_server/scraper.py
@@ -3,6 +3,7 @@ import asyncio # Changed from time
 import re
 from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError # Changed to async
 from urllib.parse import urlencode
+from math import radians, sin, cos, sqrt, atan2

 # Import the extraction functions from our helper module
 from . import extractor
@@ -13,32 +14,46 @@ DEFAULT_TIMEOUT = 30000  # 30 seconds for navigation and selectors
 SCROLL_PAUSE_TIME = 1.5  # Pause between scrolls
 MAX_SCROLL_ATTEMPTS_WITHOUT_NEW_LINKS = 5 # Stop scrolling if no new links found after this many scrolls

-# --- Helper Functions ---
-def create_search_url(query, lang="en", geo_coordinates=None, zoom=None):
-    """Creates a Google Maps search URL."""
-    params = {'q': query, 'hl': lang}
-    # Note: geo_coordinates and zoom might require different URL structure (/maps/@lat,lng,zoom)
-    # For simplicity, starting with basic query search
-    return BASE_URL + "?" + urlencode(params)
+## Calculate distance 

-# --- Main Scraping Logic ---
-async def scrape_google_maps(query, max_places=None, lang="en", headless=True): # Added async
+def haversine(lat1, lon1, lat2, lon2):
+    R = 6371  # Radius of the Earth in kilometers
+    dlat = radians(lat2 - lat1)
+    dlon = radians(lon2 - lon1)
+    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
+    c = 2 * atan2(sqrt(a), sqrt(1 - a))
+    return R * c
+    
+# --- Helper Functions ---
+def create_search_url(query, lang="en", geo_coordinates=None, zoom=12):
    """
-    Scrapes Google Maps for places based on a query.
-
+    Creates a Google Maps search URL centered around specific geo coordinates.
+    
    Args:
-        query (str): The search query (e.g., "restaurants in New York").
-        max_places (int, optional): Maximum number of places to scrape. Defaults to None (scrape all found).
-        lang (str, optional): Language code for Google Maps (e.g., 'en', 'es'). Defaults to "en".
-        headless (bool, optional): Whether to run the browser in headless mode. Defaults to True.
-
+        query (str): Search query (e.g., "campus shoe store").
+        lang (str): Language code (default: "en").
+        geo_coordinates (tuple): (latitude, longitude) to center the map.
+        zoom (int): Zoom level (default: 12).
+    
    Returns:
-        list: A list of dictionaries, each containing details for a scraped place.
-              Returns an empty list if no places are found or an error occurs.
+        str: Full Google Maps search URL.
    """
+    params = {'hl': lang}
+    encoded_query = query.replace(' ', '+')
+    if geo_coordinates:
+        lat, lng = geo_coordinates
+        return f"{BASE_URL}{encoded_query}/@{lat},{lng},{zoom}z?{urlencode(params)}"
+    return f"{BASE_URL}{encoded_query}?{urlencode(params)}"
+
+   
+
+# --- Main Scraping Logic ---
+async def scrape_google_maps(query, max_places=None, lang="en", headless=True, lat=None, lng=None, max_distance_km=30):
    results = []
    place_links = set()
    scroll_attempts_no_new = 0
+   
+    search_url = create_search_url(query, lang, geo_coordinates=(lat, lng))

    async with async_playwright() as p: # Changed to async
        try:
@@ -50,14 +65,24 @@ async def scrape_google_maps(query, max_places=None, lang="en", headless=True): 
                # Consider setting viewport, locale, timezone if needed
                locale=lang,
            )
+           
            page = await context.new_page() # Added await
            if not page:
                await browser.close() # Close browser before raising
+                if lat is not None and lng is not None:
+                    results = [
+                        place for place in results
+                        if 'latitude' in place and 'longitude' in place and
+                        haversine(lat, lng, place['latitude'], place['longitude']) <= max_distance_km
+                    ]
+                
+                print(f"\nScraping finished. Found details for {len(results)} places.")
+                return results
                raise Exception("Failed to create a new browser page (context.new_page() returned None).")
            # Removed problematic: await page.set_default_timeout(DEFAULT_TIMEOUT)
            # Removed associated debug prints

-            search_url = create_search_url(query, lang)
+            search_url = create_search_url(query, lang, geo_coordinates=(lat, lng))
            print(f"Navigating to search URL: {search_url}")
            await page.goto(search_url, wait_until='domcontentloaded') # Added await
            await asyncio.sleep(2) # Changed to asyncio.sleep, added await
@@ -100,6 +125,16 @@ async def scrape_google_maps(query, max_places=None, lang="en", headless=True): 
                    print(f"Error: Feed element '{feed_selector}' not found. Maybe no results? Taking screenshot.")
                    await page.screenshot(path='feed_not_found_screenshot.png') # Added await
                    await browser.close() # Added await
+                    if lat is not None and lng is not None:
+                        results = [
+                            place for place in results
+                            if 'latitude' in place and 'longitude' in place and
+                            haversine(lat, lng, place['latitude'], place['longitude']) <= max_distance_km
+                        ]
+                    
+                    print(f"\nScraping finished. Found details for {len(results)} places.")
+                    return results
+                    
                    return [] # No results or page structure changed

            if await page.locator(feed_selector).count() > 0: # Added await
@@ -178,6 +213,15 @@ async def scrape_google_maps(query, max_places=None, lang="en", headless=True): 
                await asyncio.sleep(0.5) # Changed to asyncio.sleep, added await

            await browser.close() # Added await
+            if lat is not None and lng is not None:
+                results = [
+                    place for place in results
+                    if 'latitude' in place and 'longitude' in place and
+                    haversine(lat, lng, place['latitude'], place['longitude']) <= max_distance_km
+                ]
+            
+            print(f"\nScraping finished. Found details for {len(results)} places.")
+            return results

        except PlaywrightTimeoutError:
            print(f"Timeout error during scraping process.")
@@ -189,9 +233,15 @@ async def scrape_google_maps(query, max_places=None, lang="en", headless=True): 
            # Ensure browser is closed if an error occurred mid-process
            if 'browser' in locals() and browser.is_connected(): # Check if browser exists and is connected
                await browser.close() # Added await
-
-    print(f"\nScraping finished. Found details for {len(results)} places.")
-    return results
+                if lat is not None and lng is not None:
+                    results = [
+                        place for place in results
+                        if 'latitude' in place and 'longitude' in place and
+                        haversine(lat, lng, place['latitude'], place['longitude']) <= max_distance_km
+                    ]
+                
+                print(f"\nScraping finished. Found details for {len(results)} places.")
+                return results

 # --- Example Usage ---
 # (Example usage block removed as this script is now intended to be imported as a module)
\ No newline at end of file
--- a/n8n-node/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ b/n8n-node/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dee123a6-7a41-4224-bdfe-abb707987ce5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3aed9b86-3149-43f0-b80f-332644d2aef4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://localhost:8000/scrape-get?query=campus%20stores&max_places=5&lang=en&headless=true&lat=19.99113822646553&lng=73.76191319096492&max_distance_km=30\"\n",
+    "response = requests.get(url)\n",
+    "data = response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1700a291-3d4d-465d-8351-d7ebf68d6992",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "df[\"latitude\"] = df[\"coordinates\"].apply(lambda x: x[\"latitude\"])\n",
+    "df[\"longitude\"] = df[\"coordinates\"].apply(lambda x: x[\"longitude\"])\n",
+    "df.drop(columns=\"coordinates\", inplace=True)\n",
+    "\n",
+    "df.to_csv(\"results.csv\", index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2a79a396-b868-4087-aac3-eeea5efe2363",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('results.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ce5b944c-8e50-4544-8524-ef2dddcc1f24",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>place_id</th>\n",
+       "      <th>address</th>\n",
+       "      <th>rating</th>\n",
+       "      <th>reviews_count</th>\n",
+       "      <th>categories</th>\n",
+       "      <th>website</th>\n",
+       "      <th>phone</th>\n",
+       "      <th>link</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>longitude</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3bc2c1c24c39b3d7:0xcccbaf8c3733cc91</td>\n",
+       "      <td>Survey No.207, Pheonix Mall pune, Store No, GP...</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>76.0</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289690432</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.562243</td>\n",
+       "      <td>73.916699</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3bc2b99ce8a282a1:0xe1a794aef1ba34ff</td>\n",
+       "      <td>SHOP NO 3, SHAGUN CHOWK, SR.2773/1,PRITAMDAS P...</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289148580</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.621968</td>\n",
+       "      <td>73.801811</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>My Campus Store</td>\n",
+       "      <td>0x3bc2bf8d7265c9f7:0xd8359f25ee6e29ca</td>\n",
+       "      <td>2, Shridhar Building, Baner Rd, behind Ancient...</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>78.0</td>\n",
+       "      <td>['Custom t-shirt store', 'E-commerce service',...</td>\n",
+       "      <td>http://www.mycampusstore.in/</td>\n",
+       "      <td>9637066482</td>\n",
+       "      <td>https://www.google.com/maps/place/My+Campus+St...</td>\n",
+       "      <td>18.557775</td>\n",
+       "      <td>73.799953</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3bc2ebdb93a63e61:0xb82636ae4fe6e866</td>\n",
+       "      <td>Shop No. 2, Sr, No- 41, Katraj-Dehu Rd Bypass,...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>['Shoe store', 'Sportswear store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289018492</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.451480</td>\n",
+       "      <td>73.848531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x395b93d44507f5e5:0x5aff1ce2a3cb1675</td>\n",
+       "      <td>First floor shop no. 14,333, Domestic Airport ...</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289925505</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.578019</td>\n",
+       "      <td>73.907157</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     name                               place_id  \\\n",
+       "0  Campus Exclusive Store  0x3bc2c1c24c39b3d7:0xcccbaf8c3733cc91   \n",
+       "1  Campus Exclusive Store  0x3bc2b99ce8a282a1:0xe1a794aef1ba34ff   \n",
+       "2         My Campus Store  0x3bc2bf8d7265c9f7:0xd8359f25ee6e29ca   \n",
+       "3  Campus Exclusive Store  0x3bc2ebdb93a63e61:0xb82636ae4fe6e866   \n",
+       "4  Campus Exclusive Store  0x395b93d44507f5e5:0x5aff1ce2a3cb1675   \n",
+       "\n",
+       "                                             address  rating  reviews_count  \\\n",
+       "0  Survey No.207, Pheonix Mall pune, Store No, GP...     4.5           76.0   \n",
+       "1  SHOP NO 3, SHAGUN CHOWK, SR.2773/1,PRITAMDAS P...     4.4           46.0   \n",
+       "2  2, Shridhar Building, Baner Rd, behind Ancient...     4.8           78.0   \n",
+       "3  Shop No. 2, Sr, No- 41, Katraj-Dehu Rd Bypass,...     NaN            NaN   \n",
+       "4  First floor shop no. 14,333, Domestic Airport ...     4.9           35.0   \n",
+       "\n",
+       "                                          categories  \\\n",
+       "0        ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "1        ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "2  ['Custom t-shirt store', 'E-commerce service',...   \n",
+       "3                 ['Shoe store', 'Sportswear store']   \n",
+       "4        ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "\n",
+       "                                             website       phone  \\\n",
+       "0  https://stores.campusshoes.com/maharashtra/pun...  9289690432   \n",
+       "1  https://stores.campusshoes.com/maharashtra/pun...  9289148580   \n",
+       "2                       http://www.mycampusstore.in/  9637066482   \n",
+       "3  https://stores.campusshoes.com/maharashtra/pun...  9289018492   \n",
+       "4  https://stores.campusshoes.com/maharashtra/pun...  9289925505   \n",
+       "\n",
+       "                                                link   latitude  longitude  \n",
+       "0  https://www.google.com/maps/place/Campus+Exclu...  18.562243  73.916699  \n",
+       "1  https://www.google.com/maps/place/Campus+Exclu...  18.621968  73.801811  \n",
+       "2  https://www.google.com/maps/place/My+Campus+St...  18.557775  73.799953  \n",
+       "3  https://www.google.com/maps/place/Campus+Exclu...  18.451480  73.848531  \n",
+       "4  https://www.google.com/maps/place/Campus+Exclu...  18.578019  73.907157  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7aba79b-ba84-49c3-a1bd-2b4ac257fc74",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/n8n-node/Untitled.ipynb
+++ b/n8n-node/Untitled.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "dee123a6-7a41-4224-bdfe-abb707987ce5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3aed9b86-3149-43f0-b80f-332644d2aef4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://localhost:8000/scrape-get?query=campus%20stores&max_places=5&lang=en&headless=true&lat=19.99113822646553&lng=73.76191319096492&max_distance_km=30\"\n",
+    "response = requests.get(url)\n",
+    "data = response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1700a291-3d4d-465d-8351-d7ebf68d6992",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "df[\"latitude\"] = df[\"coordinates\"].apply(lambda x: x[\"latitude\"])\n",
+    "df[\"longitude\"] = df[\"coordinates\"].apply(lambda x: x[\"longitude\"])\n",
+    "df.drop(columns=\"coordinates\", inplace=True)\n",
+    "\n",
+    "df.to_csv(\"results.csv\", index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "2a79a396-b868-4087-aac3-eeea5efe2363",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('results.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "ce5b944c-8e50-4544-8524-ef2dddcc1f24",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>place_id</th>\n",
+       "      <th>address</th>\n",
+       "      <th>rating</th>\n",
+       "      <th>reviews_count</th>\n",
+       "      <th>categories</th>\n",
+       "      <th>website</th>\n",
+       "      <th>phone</th>\n",
+       "      <th>link</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>longitude</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3bc2c1c24c39b3d7:0xcccbaf8c3733cc91</td>\n",
+       "      <td>Survey No.207, Pheonix Mall pune, Store No, GP...</td>\n",
+       "      <td>4.5</td>\n",
+       "      <td>76</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289690432</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.562243</td>\n",
+       "      <td>73.916699</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3be795198099b6e3:0x81def44cd8764dc6</td>\n",
+       "      <td>First Floor, F12, Metro Junction Mall, Shilpha...</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>112</td>\n",
+       "      <td>['Shoe store', 'Sportswear store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/tha...</td>\n",
+       "      <td>9289677522</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>19.228908</td>\n",
+       "      <td>73.123019</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3be7b9c605eea545:0xb793f8af46ce5fac</td>\n",
+       "      <td>Shop No. 1&amp;2, Munshi Estate, Plot No 504, MG R...</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>100</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/mum...</td>\n",
+       "      <td>9289148572</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>19.173210</td>\n",
+       "      <td>72.955426</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3be7c9013c435c23:0x43967767d741332b</td>\n",
+       "      <td>Selection Ahmed Palace, Plot No. 254 SV Road, ...</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>125</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/mum...</td>\n",
+       "      <td>9289148575</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>19.060032</td>\n",
+       "      <td>72.836883</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Campus Exclusive Store</td>\n",
+       "      <td>0x3bc2bb5812ba0af5:0xaec0dd35c89bc775</td>\n",
+       "      <td>UNIT No-1, FLOOR, 02, GRANT STREET, Phase 1, H...</td>\n",
+       "      <td>4.8</td>\n",
+       "      <td>254</td>\n",
+       "      <td>['Shoe store', 'Sportswear store', 'Store']</td>\n",
+       "      <td>https://stores.campusshoes.com/maharashtra/pun...</td>\n",
+       "      <td>9289690420</td>\n",
+       "      <td>https://www.google.com/maps/place/Campus+Exclu...</td>\n",
+       "      <td>18.594074</td>\n",
+       "      <td>73.725319</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     name                               place_id  \\\n",
+       "0  Campus Exclusive Store  0x3bc2c1c24c39b3d7:0xcccbaf8c3733cc91   \n",
+       "1  Campus Exclusive Store  0x3be795198099b6e3:0x81def44cd8764dc6   \n",
+       "2  Campus Exclusive Store  0x3be7b9c605eea545:0xb793f8af46ce5fac   \n",
+       "3  Campus Exclusive Store  0x3be7c9013c435c23:0x43967767d741332b   \n",
+       "4  Campus Exclusive Store  0x3bc2bb5812ba0af5:0xaec0dd35c89bc775   \n",
+       "\n",
+       "                                             address  rating  reviews_count  \\\n",
+       "0  Survey No.207, Pheonix Mall pune, Store No, GP...     4.5             76   \n",
+       "1  First Floor, F12, Metro Junction Mall, Shilpha...     4.8            112   \n",
+       "2  Shop No. 1&2, Munshi Estate, Plot No 504, MG R...     4.7            100   \n",
+       "3  Selection Ahmed Palace, Plot No. 254 SV Road, ...     4.4            125   \n",
+       "4  UNIT No-1, FLOOR, 02, GRANT STREET, Phase 1, H...     4.8            254   \n",
+       "\n",
+       "                                    categories  \\\n",
+       "0  ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "1           ['Shoe store', 'Sportswear store']   \n",
+       "2  ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "3  ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "4  ['Shoe store', 'Sportswear store', 'Store']   \n",
+       "\n",
+       "                                             website       phone  \\\n",
+       "0  https://stores.campusshoes.com/maharashtra/pun...  9289690432   \n",
+       "1  https://stores.campusshoes.com/maharashtra/tha...  9289677522   \n",
+       "2  https://stores.campusshoes.com/maharashtra/mum...  9289148572   \n",
+       "3  https://stores.campusshoes.com/maharashtra/mum...  9289148575   \n",
+       "4  https://stores.campusshoes.com/maharashtra/pun...  9289690420   \n",
+       "\n",
+       "                                                link   latitude  longitude  \n",
+       "0  https://www.google.com/maps/place/Campus+Exclu...  18.562243  73.916699  \n",
+       "1  https://www.google.com/maps/place/Campus+Exclu...  19.228908  73.123019  \n",
+       "2  https://www.google.com/maps/place/Campus+Exclu...  19.173210  72.955426  \n",
+       "3  https://www.google.com/maps/place/Campus+Exclu...  19.060032  72.836883  \n",
+       "4  https://www.google.com/maps/place/Campus+Exclu...  18.594074  73.725319  "
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7aba79b-ba84-49c3-a1bd-2b4ac257fc74",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/requirements.txt
+++ b/requirements.txt
-playwright
-fastapi
-uvicorn[standard]
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
-from setuptools import setup, find_packages
-
-setup(
-    name="gmaps_scraper_server",
-    version="0.1",
-    packages=find_packages(),
-    install_requires=[
-        "playwright",
-        "fastapi",
-        "uvicorn[standard]"
-    ],
-)
\ No newline at end of file