diff --git a/pyproject.toml b/pyproject.toml index a56e149..195f36c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,14 +9,7 @@ authors = [{ name = "pdf", email = "git@nixon.mozmail.com" }] description = "Snowflake scraper" requires-python = ">=3.9" license = { text = "MIT" } -dependencies = [ - "pydantic==1.10.10", - "scrapy>=2.10.0", - "feedparser>=6.0.10", - "uvloop>=0.17.0", - "fastapi>=0.101.0", - "uvicorn>=0.22.0", -] +dependencies = ["pydantic==1.10.10", "scrapy>=2.10.0", "feedparser>=6.0.10"] [project.scripts] snowscraper = "snowscraper.cli:run" diff --git a/snowscraper/api.py b/snowscraper/api.py deleted file mode 100644 index df2e285..0000000 --- a/snowscraper/api.py +++ /dev/null @@ -1,3 +0,0 @@ -from fastapi import APIRouter - -app = APIRouter() diff --git a/snowscraper/cli.py b/snowscraper/cli.py index 287dfbf..1ec5e11 100644 --- a/snowscraper/cli.py +++ b/snowscraper/cli.py @@ -1,48 +1,16 @@ import argparse -import asyncio import sys from datetime import datetime from datetime import timezone -from types import FrameType - -import uvicorn -import uvloop from .controller import import_scrapers from .controller import run_all -app_fastapi = None - - -class SnowScraper(uvicorn.Server): - def handle_exit(self, sig: int, frame: FrameType) -> None: - print("Shutting down SnowScraper") - return super().handle_exit(sig, frame) - - -async def start_server() -> None: - print("Starting SnowScraper") - server = SnowScraper(config=uvicorn.Config(app_fastapi, workers=1, loop="uvloop")) - fastapi = asyncio.create_task(server.serve()) - - await asyncio.wait([fastapi], return_when=asyncio.FIRST_COMPLETED) - def main(args: argparse.Namespace) -> None: - global app_fastapi - - from .webserver import app as app_fastapi - import_scrapers() run_all(args) - if sys.version_info >= (3, 11): - with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner: - runner.run(start_server()) - else: - uvloop.install() - asyncio.run(start_server()) - def run(): parser = argparse.ArgumentParser(description="Snowflake scraper") diff --git a/snowscraper/helpers.py b/snowscraper/helpers.py index a1311e4..dc7849d 100644 --- a/snowscraper/helpers.py +++ b/snowscraper/helpers.py @@ -2,24 +2,8 @@ from datetime import datetime def string_to_datetime(date_string): - """ - Convert a date string to a datetime object. - - This function supports the following formats: - - ISO 8601 with and without timezone offset: e.g., '2023-08-02T09:27:54-05:00' or '2023-08-07T18:32:05.537Z' - - RFC 1123: e.g., 'Mon, 07 Aug 2023 18:32:05 GMT' - - Args: - - date_string (str): The date string to convert - - Returns: - - datetime.datetime: The datetime representation of the provided date string. - - Raises: - - ValueError: If the provided date string doesn't match any of the supported formats. - """ try: - # First, try ISO 8601 format + # try ISO 8601 if "Z" in date_string: return datetime.fromisoformat(date_string.replace("Z", "+00:00")) return datetime.fromisoformat(date_string) @@ -27,10 +11,9 @@ def string_to_datetime(date_string): pass try: - # Then, try RFC 1123 format + # try RFC 1123 return datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %Z") except ValueError: pass - # If neither format matches, raise an exception raise ValueError(f"Unsupported date format: {date_string}") diff --git a/snowscraper/scrapers/youtube.py b/snowscraper/scrapers/youtube.py deleted file mode 100644 index f6c18dc..0000000 --- a/snowscraper/scrapers/youtube.py +++ /dev/null @@ -1,20 +0,0 @@ -from datetime import datetime -from datetime import timezone - -from ..controller import register_scraper -from ..scraper import BaseScraper - - -@register_scraper -class YoutubeScraper(BaseScraper): - def __init__(self, after=datetime(1970, 1, 1, tzinfo=timezone.utc), *args, **kwargs): - super(YoutubeScraper, self).__init__(*args, **kwargs) - self.data = {} - self.after = after - - def scrape(self): - print("Scraping YouTube... Unimplemented") - self.data = {} - - def transform(self): - return self.data diff --git a/snowscraper/webserver.py b/snowscraper/webserver.py deleted file mode 100644 index 31a4426..0000000 --- a/snowscraper/webserver.py +++ /dev/null @@ -1,11 +0,0 @@ -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware - -from .api import app as api_v1 - -app = FastAPI() -app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["GET"], allow_headers=["*"]) -app.include_router(api_v1, prefix="/api/v1") - -if __name__ == "__main__": - app.run()