diff --git a/.github/workflows/scrape_and_commit.yml b/.github/workflows/scrape_and_commit.yml deleted file mode 100644 index 53b180a..0000000 --- a/.github/workflows/scrape_and_commit.yml +++ /dev/null @@ -1,35 +0,0 @@ - -name: Run Snowscraper and Commit Changes - -on: - schedule: - - cron: '0 0 * * *' # Run daily at midnight - workflow_dispatch: # Allow manual trigger - -jobs: - scrape-and-commit: - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run Scraper - run: python -m snowscraper.cli - - - name: Commit and Push Changes - run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git add -A - git commit -m "Update scraped data" || echo "No changes to commit" - git push diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 47b30eb..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -feedparser -scrapy \ No newline at end of file diff --git a/results.json b/results.json deleted file mode 100644 index 0993a07..0000000 --- a/results.json +++ /dev/null @@ -1,867 +0,0 @@ -{ - "https://medium.com/snowflake/simplifying-security-data-ingestion-recent-snowflake-features-minimize-cost-and-complexity-b72d7eba77ab?source=rss----34b6daafc07---4": { - "title": "Simplifying Security Data Ingestion: Recent Snowflake features minimize cost and complexity", - "published": "2023-09-18T20:07:44", - "updated": "2023-09-18T23:50:04.696000+00:00" - }, - "https://medium.com/snowflake/ip-protection-in-snowflake-native-apps-dc45173af152?source=rss----34b6daafc07---4": { - "title": "IP Protection in Snowflake Native Apps", - "published": "2023-09-18T19:01:49", - "updated": "2023-09-18T19:01:48.495000+00:00" - }, - "https://medium.com/snowflake/sis-application-development-ci-cd-setup-with-github-actions-3a4279f57287?source=rss----34b6daafc07---4": { - "title": "SiS Application Development: CI/CD Setup with GitHub Actions", - "published": "2023-09-15T19:48:20", - "updated": "2023-09-15T19:48:20.672000+00:00" - }, - "https://medium.com/snowflake/finops-for-snowflake-f37d531eb8e2?source=rss----34b6daafc07---4": { - "title": "FinOps for Snowflake", - "published": "2023-09-15T17:40:31", - "updated": "2023-09-15T17:40:31.497000+00:00" - }, - "https://medium.com/snowflake/improving-llms-management-in-snowflake-e7e5c045f2db?source=rss----34b6daafc07---4": { - "title": "Improving LLMs management in Snowflake", - "published": "2023-09-15T17:01:58", - "updated": "2023-09-15T17:01:58.209000+00:00" - }, - "https://medium.com/snowflake/7-guardrails-against-common-mistakes-that-inflate-snowflake-credit-usage-cc2bf5421681?source=rss----34b6daafc07---4": { - "title": "7 guardrails against common mistakes that inflate Snowflake credit usage", - "published": "2023-09-15T16:13:01", - "updated": "2023-09-15T16:13:01.391000+00:00" - }, - "https://medium.com/snowflake/simplifying-data-ingestion-creating-a-data-pipeline-in-snowflake-with-sftp-e99033f230c2?source=rss----34b6daafc07---4": { - "title": "Simplifying Data Ingestion: Creating a Data pipeline in Snowflake with SFTP", - "published": "2023-09-14T19:01:47", - "updated": "2023-09-15T14:47:09.558000+00:00" - }, - "https://medium.com/snowflake/data-sharing-patterns-in-snowflake-3b526729efd7?source=rss----34b6daafc07---4": { - "title": "Data Sharing Patterns in Snowflake", - "published": "2023-09-14T15:51:56", - "updated": "2023-09-15T06:15:22.329000+00:00" - }, - "https://medium.com/snowflake/deep-dive-into-security-and-performance-isolation-of-snowflake-virtual-warehouses-73bdecc69f4?source=rss----34b6daafc07---4": { - "title": "Deep dive into security and performance isolation of Snowflake virtual warehouses", - "published": "2023-09-12T18:11:43", - "updated": "2023-09-15T02:10:02.212000+00:00" - }, - "https://medium.com/snowflake/snowflake-gen-ai-assistant-a838e1942d21?source=rss----34b6daafc07---4": { - "title": "Snowflake Gen-AI Assistant", - "published": "2023-09-12T11:01:39", - "updated": "2023-09-12T11:01:39.062000+00:00" - }, - "https://quickstarts.snowflake.com/guide/accelerate_your_graphql_development_on_snowflake_with_hasura/index.html?index=..%2F..index": { - "title": "Accelerate Your GraphQL Development on Snowflake with Hasura", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "api,gettingstarted,graphql,hasura,quickstart,rest,web" - }, - "https://quickstarts.snowflake.com/guide/analyzing_real_estate_properties_with_streamlit/index.html?index=..%2F..index": { - "title": "Analyzing real estate properties using Streamlit", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "datascience&ml,solutionexamples,web" - }, - "https://quickstarts.snowflake.com/guide/build_customer_facing_applications_using_sigma_and_snowflake/index.html?index=..%2F..index": { - "title": "Build Customer Facing Applications Using Sigma and Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "dataapplications,dataengineering,gettingstarted,sigma,web" - }, - "https://quickstarts.snowflake.com/guide/data_app/index.html?index=..%2F..index": { - "title": "Building a Data Application", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "api,dataapplications,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_thoughtspot/index.html?index=..%2F..index": { - "title": "Build a ReactJS app with ThoughtSpot and Snowflake", - "updated": "2023-08-15T02:36:48-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_data_marketplace_app/index.html?index=..%2F..index": { - "title": "Building an application on Snowflake with data from Snowflake Marketplace", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/dcdf_incremental_processing/index.html?index=..%2F..index": { - "title": "Getting Started with DCDF Data Architecture Incremental Processing & Logical Partitions", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataarchitecture,dataclouddeploymentframework,dataengineering,dcdf,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/servicenow_to_snowflake_connector/index.html?index=..%2F..index": { - "title": "Snowflake Connector for ServiceNow Installation", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "connectors,dataengineering,servicenow,web" - }, - "https://quickstarts.snowflake.com/guide/altr_get_started/index.html?index=..%2F..index": { - "title": "ALTR Quickstart - Data Access Control", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "dataowner,datasecurity,datasteward,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/cloudtrail_log_ingestion/index.html?index=..%2F..index": { - "title": "AWS Cloudtrail Ingestion", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "aws,cybersecurity,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_pii/index.html?index=..%2F..index": { - "title": "Process PII data using Snowflake RBAC, DAC, Row Access Policies, and Column Level Security", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "compliance,datagovernance,gettingstarted,masking,pii,rowlevelsecurity,security,sensitivedata,web" - }, - "https://quickstarts.snowflake.com/guide/integrating_fluentd_with_snowflake/index.html?index=..%2F..index": { - "title": "Using Fluentd to Send Log Files to Snowflake for Security Analytics", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "datagovernance,siem,web" - }, - "https://quickstarts.snowflake.com/guide/python_camouflage/index.html?index=..%2F..index": { - "title": "Tokenization in Snowflake Using Python UDFs (Python Camouflage)", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "dataengineering,datascience,encryption,gettingstarted,python,security,tokenization,web" - }, - "https://quickstarts.snowflake.com/guide/s3_access_log_ingestion/index.html?index=..%2F..index": { - "title": "AWS S3 Access Logs Ingestion", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "cybersecurity,datalossprotection,web" - }, - "https://quickstarts.snowflake.com/guide/security_dashboards_for_snowflake/index.html?index=..%2F..index": { - "title": "Snowflake Security Dashboards", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "dashboards,security,snowsight,web" - }, - "https://quickstarts.snowflake.com/guide/vpc_flow_log_ingestion/index.html?index=..%2F..index": { - "title": "AWS VPC Flow Logs Ingestion", - "updated": "2023-08-15T02:36:56-07:00", - "tags": "cspm,cybersecurity,siem,vpcflowlogs,web" - }, - "https://quickstarts.snowflake.com/guide/auto_ingest_twitter_data/index.html?index=..%2F..index": { - "title": "Auto-Ingest Twitter Data into Snowflake", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "autoingest,cloudstorage,snowpipe,twitter" - }, - "https://quickstarts.snowflake.com/guide/build_a_data_clean_room_in_snowflake_advanced/index.html?index=..%2F..index": { - "title": "Build A Data Clean Room in Snowflake - Advanced", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "datacleanrooms,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_a_multiparty_clean_room_in_snowflake/index.html?index=..%2F..index": { - "title": "Build a Multiparty Data Clean Room in Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "datacleanrooms,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/cdc_data_from_oracle_to_snowflake_in_streamsets/index.html?index=..%2F..index": { - "title": "Process Change Data Capture (CDC) data from Oracle to Snowflake Using StreamSets", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "oraclecdc,web" - }, - "https://quickstarts.snowflake.com/guide/CDC_SnowpipeStreaming_DynamicTables/index.html?index=..%2F..index": { - "title": "Snowpipe Streaming and Dynamic Tables for Real-Time Ingestion (CDC Use Case)", - "updated": "2023-09-18T11:53:39-07:00", - "tags": "cdc,dataengineering,dynamictables,financialservices,snowpipe,streaming,web" - }, - "https://quickstarts.snowflake.com/guide/cloud_native_data_engineering_with_matillion_and_snowflake/index.html?index=..%2F..index": { - "title": "Cloud Native Data Engineering with Matillion and Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "dataengineering,datatransformation,gettingstarted,partner,web" - }, - "https://quickstarts.snowflake.com/guide/cross_cloud_business_continuity/index.html?index=..%2F..index": { - "title": "Cross Cloud Business Continuity With Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/data_engineering_streaming_integration/index.html?index=..%2F..index": { - "title": "Streaming Data Integration with Snowflake", - "updated": "2023-08-15T02:36:45-07:00", - "tags": "dataengineering,snowpipe,streaming,web" - }, - "https://quickstarts.snowflake.com/guide/data_engineering_with_apache_airflow/index.html?index=..%2F..index": { - "title": "Data Engineering with Apache Airflow, Snowflake & dbt", - "updated": "2023-08-15T02:36:45-07:00", - "tags": "airflow,dataengineering,dbt,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/data_engineering_with_snowpark_python_and_dbt/index.html?index=..%2F..index": { - "title": "Data Engineering with Snowpark Python and dbt", - "updated": "2023-08-15T02:36:45-07:00", - "tags": "dataengineering,dbt,web" - }, - "https://quickstarts.snowflake.com/guide/data_teams_with_dbt_core/index.html?index=..%2F..index": { - "title": "Accelerating Data Teams with dbt Core & Snowflake", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,datasharing,dbt,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/database_modeling_with_sqldbm/index.html?index=..%2F..index": { - "title": "Cloud-native Database Modeling with SqlDBM", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "cicd,dataengineering,datamodeling,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/extract_attributes_dicom_files_java_udf/index.html?index=..%2F..index": { - "title": "Extract Attributes from DICOM Files using Snowpark for Python and Java", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "dataengineering,datascience,unstructureddata,web" - }, - "https://quickstarts.snowflake.com/guide/ingest_data_from_pubsub_to_snowflake_with_apache_beam/index.html?index=..%2F..index": { - "title": "Ingest data from PubSub to Snowflake with Apache Beam", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "apachebeam,dataengineering,web" - }, - "https://quickstarts.snowflake.com/guide/parsing_semi_structured_data_with_coalesce/index.html?index=..%2F..index": { - "title": "Parsing Semi-Structured Data with Coalesce", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/processing_hl7_fhir_messages_with_snowflake/index.html?index=..%2F..index": { - "title": "Getting Started - Processing HL7 FHIR Messages with Snowflake", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "dataengineering,datalake,datascience,fhirpwd,gettingstarted,health&lifesciences,hl7,web" - }, - "https://quickstarts.snowflake.com/guide/processing_hl7_v2_messages_with_snowflake/index.html?index=..%2F..index": { - "title": "Getting Started - Processing HL7 V2 Messages with Snowflake", - "updated": "2023-09-18T13:24:41-07:00", - "tags": "dataengineering,datalake,datascience,fhir,gettingstarted,health&lifesciences,hl7,unstructureddata,web" - }, - "https://quickstarts.snowflake.com/guide/snowflake_transformer/index.html?index=..%2F..index": { - "title": "A Dive Into Slowly Changing Dimensions with Snowpark and StreamSets", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "dataengineering,gettingstarted,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/snowpark_python_top_three_tips_for_optimal_performance/index.html?index=..%2F..index": { - "title": "Snowpark Python: Top Three Tips for Optimal Performance", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "bestpractices,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/streamsets_transformer_for_snowflake_hol/index.html?index=..%2F..index": { - "title": "StreamSets' Transformer for Snowflake: Hands on Lab", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "dataengineering,datascience,gettingstarted,streamsets,web" - }, - "https://quickstarts.snowflake.com/guide/transform_your_data_with_coalesce/index.html?index=..%2F..index": { - "title": "Accelerate Transformations with Coalesce and Snowflake", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_data_vault/index.html?index=..%2F..index": { - "title": "Building a Real-Time Data Vault in Snowflake", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataenineering,datavault,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_fivetran/index.html?index=..%2F..index": { - "title": "Automating Data Pipelines to Drive Marketing Analytics with Snowflake & Fivetran", - "updated": "2023-08-15T02:36:56-07:00", - "tags": "dbt,fivetran,gettingstarted,marketinganalytics,web" - }, - "https://quickstarts.snowflake.com/guide/A Faster Path to Operational AI with Continual and Snowflake/index.html?index=..%2F..index": { - "title": "A Faster Path to Operational AI with Continual and Snowflake", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "dataengineering,datascience,gettingstarted,machinelearning,operationalai,web" - }, - "https://quickstarts.snowflake.com/guide/analyze_pdf_invoices_snowpark_python_java/index.html?index=..%2F..index": { - "title": "Analyze PDF Invoices using Snowpark for Java and Python", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,unstructureddata,web" - }, - "https://quickstarts.snowflake.com/guide/automl_with_snowflake_and_datarobot/index.html?index=..%2F..index": { - "title": "Accelerating Machine Learning with Snowflake and DataRobot", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "automl,databases,datarobot,fileformats,partnerconnect,stages,tables,web" - }, - "https://quickstarts.snowflake.com/guide/automl_with_snowflake_and_h2o/index.html?index=..%2F..index": { - "title": "AutoML with Snowflake and H2O Driverless AI", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "automl,databases,fileformats,h2o,partnerconnect,stages,tables,web" - }, - "https://quickstarts.snowflake.com/guide/data_science_with_dataiku/index.html?index=..%2F..index": { - "title": "Accelerating Data Science with Snowflake and Dataiku", - "updated": "2023-08-15T02:36:45-07:00", - "tags": "dataiku,datascience,web" - }, - "https://quickstarts.snowflake.com/guide/end_to_end_machine_learning_with_dataiku/index.html?index=..%2F..index": { - "title": "End to End Machine learning with Snowflake and Dataiku", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/exploratory_data_analysis_with_snowflake_and_deepnote/index.html?index=..%2F..index": { - "title": "Exploratory Data Analysis with Snowflake and Deepnote", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/frosty_llm_chatbot_on_streamlit_snowflake/index.html?index=..%2F..index": { - "title": "Frosty: Build an LLM Chatbot in Streamlit on your Snowflake Data", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "llms,openai,snowparkpython,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_snowpark_machine_learning/index.html?index=..%2F..index": { - "title": "Machine Learning with Snowpark Python: - Credit Card Approval Prediction", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/harness_the_power_of_snowflake_with_informatica_idmc/index.html?index=..%2F..index": { - "title": "Harness the Power of Snowflake with Informatica Intelligent Data Management Cloud", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,dataintegration,elt,etl,gettingstarted,informatica,pdo,web" - }, - "https://quickstarts.snowflake.com/guide/hex-churn-model/index.html?index=..%2F..index": { - "title": "Churn modeling using Snowflake and Hex", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "hex,notebooks,partnerconnect,web" - }, - "https://quickstarts.snowflake.com/guide/hex/index.html?index=..%2F..index": { - "title": "Building and deploying a time series forecast with Hex + Snowflake", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "hex,notebooks,partnerconnect,web" - }, - "https://quickstarts.snowflake.com/guide/image_recognition_snowpark_pytorch_streamlit_openai/index.html?index=..%2F..index": { - "title": "A Image Recognition App in Snowflake using Snowpark Python, PyTorch, Streamlit and OpenAI", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/machine_learning_with_aws_autopilot/index.html?index=..%2F..index": { - "title": "Snowflake and Amazon SageMaker Autopilot Integration: Machine Learning with SQL", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "datascience,gettingstarted,machinelearning,web" - }, - "https://quickstarts.snowflake.com/guide/machine_learning_with_saturncloud/index.html?index=..%2F..index": { - "title": "Machine Learning on Unstructured Data with Saturn Cloud and Snowflake", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "computervision,machinelearning,web" - }, - "https://quickstarts.snowflake.com/guide/predict_ad_impressions_with_ml_powered_analysis/index.html?index=..%2F..index": { - "title": "Predict Ad Impressions with ML-Powered Analysis", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "adtech,gettingstarted,machinelearning,web" - }, - "https://quickstarts.snowflake.com/guide/reach_and_frequency_queries/index.html?index=..%2F..index": { - "title": "Reach and Frequency queries for advertising measurement", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "adtech,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/recommendation_engine_aws_sagemaker/index.html?index=..%2F..index": { - "title": "Build a Recommendation Engine with AWS SageMaker", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "aws,machinelearning,sagemaker" - }, - "https://quickstarts.snowflake.com/guide/resource_optimization_billing_metrics/index.html?index=..%2F..index": { - "title": "Resource Optimization: Billing Metrics", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "billing,billingmetrics,costoptimization,monitoring,resourceoptimization,web" - }, - "https://quickstarts.snowflake.com/guide/seamless_ML_workflows_with_snowpark_and_deepnote/index.html?index=..%2F..index": { - "title": "Seamless Machine Learning Workflows with Snowpark & Deepnote", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/secure-crosswalks-for-advertising-measurement/index.html?index=..%2F..index": { - "title": "Secure Crosswalks for Advertising Measurement", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "adtech,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/text_embedding_as_snowpark_python_udf/index.html?index=..%2F..index": { - "title": "Text Embedding As A Snowpark Python UDF", - "updated": "2023-08-17T14:16:52-07:00", - "tags": "datascience,machinelearning,nlp,snowparkpython,web" - }, - "https://quickstarts.snowflake.com/guide/time_series_forecasting_zepl/index.html?index=..%2F..index": { - "title": "Time Series Forecasting with Zepl", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datascience,financialservices,gettingstarted,modeling,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_snowflake_data_wrangler/index.html?index=..%2F..index": { - "title": "Data-centric Approach to Machine Learning Using Snowflake and Amazon SageMaker Data Wrangler", - "updated": "2023-08-15T02:36:56-07:00", - "tags": "datamarketplace,datawrangler,featureengineering,financialservices,machinelearning,sagemaker,storageintegration,web" - }, - "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_azure_devops/index.html?index=..%2F..index": { - "title": "DevOps: Database Change Management with schemachange and Azure DevOps", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,devops,web" - }, - "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_github/index.html?index=..%2F..index": { - "title": "DevOps: Database Change Management with schemachange and GitHub", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,devops,web" - }, - "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_jenkins/index.html?index=..%2F..index": { - "title": "DevOps: Database Change Management with schemachange and Jenkins", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,devops,web" - }, - "https://quickstarts.snowflake.com/guide/devops_dcm_terraform_github/index.html?index=..%2F..index": { - "title": "DevOps: Database Change Management with Terraform and GitHub", - "updated": "2023-08-15T02:36:46-07:00", - "tags": "dataengineering,devops,web" - }, - "https://quickstarts.snowflake.com/guide/a_postman_tutorial_for_snowflake_sql_api/index.html?index=..%2F..index": { - "title": "A Postman Tutorial for the Snowflake SQL API", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "api,gettingstarted,postman,rest,web" - }, - "https://quickstarts.snowflake.com/guide/alert_on_events/index.html?index=..%2F..index": { - "title": "Getting Started with Event Tables and Alerts", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "dataapplications,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/data_apps_summit_lab/index.html?index=..%2F..index": { - "title": "Building a data application with Snowflake Marketplace, Snowpark and Streamlit", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/data_engineering_pipelines_with_snowpark_python/index.html?index=..%2F..index": { - "title": "Data Engineering Pipelines with Snowpark Python", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,python,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/data_engineering_with_datastage/index.html?index=..%2F..index": { - "title": "A Data Integration Guide: Load Banking Data into Snowflake with IBM DataStage", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,dataintegration,datastage,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/geocoding_address_data_with_mapbox/index.html?index=..%2F..index": { - "title": "Geocoding Address Data with Mapbox", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "api,geo,geocoding,geospatial,gettingstarted,spatial" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_dataengineering_ml_using_snowpark_python_ja/index.html?index=..%2F..index": { - "title": "Snowpark for Python\u3092\u4f7f\u7528\u3057\u305f\u30c7\u30fc\u30bf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3068ML\u306e\u5165\u9580", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,gettingstarted,ja,machinelearning,scikit-learn,snowparkpython,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_dataengineering_ml_using_snowpark_python/index.html?index=..%2F..index": { - "title": "Getting Started with Data Engineering and ML using Snowpark for Python", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_generative_ai_snowflake_external_functions/index.html?index=..%2F..index": { - "title": "Getting Started with Generative AI in Snowflake and Streamlit", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowflake/index.html?index=..%2F..index": { - "title": "Getting Started with Snowflake - Zero to Snowflake", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_for_python_streamlit/index.html?index=..%2F..index": { - "title": "Getting Started With Snowpark for Python and Streamlit", - "updated": "2023-09-18T12:31:04-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_in_snowflake_python_worksheets_ja/index.html?index=..%2F..index": { - "title": "Snowflake Python\u30ef\u30fc\u30af\u30b7\u30fc\u30c8\u306b\u3088\u308bSnowpark\u5165\u9580", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,gettingstarted,ja,pythonworksheets,snowparkpython,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_in_snowflake_python_worksheets/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark in Snowflake Python Worksheets", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpipe/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpipe", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "auto-ingest,aws,dataengineering,gettingstarted,snowpipe,sql,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_streams_and_tasks/index.html?index=..%2F..index": { - "title": "Getting Started with Streams & Tasks", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,financialservices,streams,tasks,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_time_travel/index.html?index=..%2F..index": { - "title": "Getting Started with Time Travel", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,gettingstarted,hello,sql,test,web" - }, - "https://quickstarts.snowflake.com/guide/terraforming_snowflake/index.html?index=..%2F..index": { - "title": "Terraforming Snowflake", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataapplications,dataengineering,datascience,gettingstarted,terraform,web" - }, - "https://quickstarts.snowflake.com/guide/tour_of_ingest/index.html?index=..%2F..index": { - "title": "Tour of Ingest", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataapplications,dataengineering,datascience,gettingstarted,ingest,web" - }, - "https://quickstarts.snowflake.com/guide/Accelerate_Data_Transformation_with_the_Telecom_Data_Cloud/index.html?index=..%2F..index": { - "title": "Accelerate Data Transformation with the Telecom Data Cloud and Informatica", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "dataengineering,dataintegration,elt,etl,gettingstarted,informatica,pdo,web" - }, - "https://quickstarts.snowflake.com/guide/accelerating_data_teams_with_snowflake_and_dbt_cloud_hands_on_lab/index.html?index=..%2F..index": { - "title": "Accelerating Data Teams with Snowflake and dbt Cloud Hands On Lab", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "data,dataengineering,dbt,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_java_on_aws/index.html?index=..%2F..index": { - "title": "Build a Custom API in Java on AWS", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "api,dataapplications,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_python_on_aws/index.html?index=..%2F..index": { - "title": "Build a Custom API in Python on AWS", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "api,dataapplications,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_python/index.html?index=..%2F..index": { - "title": "Build a Custom API in Python and Flask", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "api,dataapplications,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_a_data_app_with_snowflake/index.html?index=..%2F..index": { - "title": "Build a Data App with Snowflake", - "updated": "2023-08-17T14:16:52-07:00", - "tags": "api,dataapplications,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/build_an_ad_tag_in_python_on_aws/index.html?index=..%2F..index": { - "title": "Build an Ad Tag in Python on AWS", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "adtech,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/coherent_spark_connector/index.html?index=..%2F..index": { - "title": "Coherent Spark Connector - use business logic from Excel spreadsheets in Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "coherent,coherentspark,excel,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/create_eureka_moments_with_data_world/index.html?index=..%2F..index": { - "title": "Create Eureka Moments with data.world and Snowflake", - "updated": "2023-08-15T02:36:44-07:00", - "tags": "datacatalog,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/end_to_end_nlp_and_ml_using_snowpark_python_and_streamlit:_sentiments_analysis/index.html?index=..%2F..index": { - "title": "NLP and ML with Snowpark Python and Streamlit for Sentiment Analysis", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "dataengineering,datascience,gettingstarted,nlp,snowpark,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/example_matt_marzillo/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark for Machine Learning on AzureML", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/financial-services-asset-management-snowflake/index.html?index=..%2F..index": { - "title": "Financial Services Asset Management", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/geo_analysis_geometry/index.html?index=..%2F..index": { - "title": "Geospatial Analysis using Geometry Data Type", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "geospatial,gettingstarted,pythonudfs,web" - }, - "https://quickstarts.snowflake.com/guide/geo_analysis_telecom/index.html?index=..%2F..index": { - "title": "Geospatial Analytics for Telecom with Snowflake and Carto", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "geospatial,gettingstarted,pythonudfs,web" - }, - "https://quickstarts.snowflake.com/guide/geospatial_analytics_with_snowflake_and_carto_ny/index.html?index=..%2F..index": { - "title": "Geospatial Analytics for Retail with Snowflake and CARTO", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_datameer/index.html?index=..%2F..index": { - "title": "Getting Started with Datameer", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "cataloging,exploration,gettingstarted,tranformation,visualisation,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_external_functions_aws/index.html?index=..%2F..index": { - "title": "Getting Started With External Functions on AWS", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "aws,dataengineering,externalfunctions,gettingstarted,sql" - }, - "https://quickstarts.snowflake.com/guide/getting_started_external_functions_azure/index.html?index=..%2F..index": { - "title": "Getting Started With External Functions on Azure", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "azure,dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_external_functions_gcc/index.html?index=..%2F..index": { - "title": "Getting Started With External Functions Using the Google Cloud Console", - "updated": "2023-08-15T02:36:47-07:00", - "tags": "dataengineering,externalfunctions,gcc,gettingstarted,sql" - }, - "https://quickstarts.snowflake.com/guide/getting_started_keboola/index.html?index=..%2F..index": { - "title": "Getting Started with Keboola", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_snowflake_sql_api/index.html?index=..%2F..index": { - "title": "Getting Started With Snowflake SQL API", - "updated": "2023-08-15T02:36:48-07:00", - "tags": "dataengineering,gettingstarted,sql,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_amg_and_streamlit_on_real-time_dashboarding/index.html?index=..%2F..index": { - "title": "Getting Started with Amazon Managed Service for Grafana and Streamlit On Real-time Dashboarding", - "updated": "2023-08-15T16:54:15-07:00", - "tags": "amazonamg,applicationloadbalancer,container,ecs,gettingstarted,grafana,streaming,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_feast_snowflake/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark for Python and Feast", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_geospatial_geography/index.html?index=..%2F..index": { - "title": "Getting Started with Geospatial - Geography", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "geospatial,gettingstarted,sql,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_native_apps/index.html?index=..%2F..index": { - "title": "Getting Started with Snowflake Native Apps", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,nativeapps,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_python/index.html?index=..%2F..index": { - "title": "Getting Started with Python", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,gettingstarted,python,snowsql,sql" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_search_optimization/index.html?index=..%2F..index": { - "title": "Getting started with Search Optimization", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,gettingstarted,performance,queryacceleration,search,searchoptimization,speed,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_sigma/index.html?index=..%2F..index": { - "title": "Getting Started with Sigma", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "analysis,analytics,bi,businessintelligence,dashboarding,dataengineering,excel,gettingstarted,sigma,sigmacomputing,snowflake,spreadsheet,visualization,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_dataframe_api/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark and the Dataframe API", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,datascience,gettingstarted,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_for_machine_learning_on_sagemaker/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark for Machine Learning on SageMaker", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_python_scikit/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpark for Python with Scikit-learn", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_scala/index.html?index=..%2F..index": { - "title": "Getting Started With Snowpark Scala", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowpipe_streaming_aws_msk/index.html?index=..%2F..index": { - "title": "Getting Started with Snowpipe Streaming and Amazon MSK", - "updated": "2023-08-15T16:54:15-07:00", - "tags": "amazonmsk,gettingstarted,kafka,snowpipestreaming,snowsql,streaming,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_snowsql/index.html?index=..%2F..index": { - "title": "Getting Started with SnowSQL", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,gettingstarted,snowsql,sql,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_unstructured_data/index.html?index=..%2F..index": { - "title": "Getting Started with Unstructured Data", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "dataengineering,datalake,datascience,gettingstarted,unstructureddata,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_user_defined_sql_functions/index.html?index=..%2F..index": { - "title": "Getting Started With User-Defined SQL Functions", - "updated": "2023-08-15T02:36:50-07:00", - "tags": "gettingstarted,sql,udf,udtf,web" - }, - "https://quickstarts.snowflake.com/guide/getting-started-django-snowflake/index.html?index=..%2F..index": { - "title": "Getting Started with Snowflake as a backend for Django", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,django,django-snowflake,gettingstarted,snowflake,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/how_to_resolve_data_with_fullcontact_and_snowflake/index.html?index=..%2F..index": { - "title": "How To Resolve Data with FullContact and Snowflake", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "web" - }, - "https://quickstarts.snowflake.com/guide/intro_to_machine_learning_with_snowpark_ml_for_python/index.html?index=..%2F..index": { - "title": "Intro to Machine Learning with Snowpark ML for Python", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/java_trace_events/index.html?index=..%2F..index": { - "title": "Using trace events in Java", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "gettingstarted,java,telemetry,traceevents,web" - }, - "https://quickstarts.snowflake.com/guide/leverage_dbt_cloud_to_generate_ml_ready_pipelines_using_snowpark_python/index.html?index=..%2F..index": { - "title": "Leverage dbt Cloud to Generate ML ready pipelines using Snowpark python", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/mlpf_forecasting_ad/index.html?index=..%2F..index": { - "title": "Getting Started with Anomaly Detection & Forecasting with Machine Learning Powered Functions (MLPFs)", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/modern_data_stack_with_fivetran_snowflake_salesforce/index.html?index=..%2F..index": { - "title": "Fivetran - Automate Salesforce Insights: Source, Target, Transformations, Dashboard...NO CODE", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "analytics,dataengineering,dbt,fivetran,gettingstarted,salesforce,web" - }, - "https://quickstarts.snowflake.com/guide/native-app-chairlift/index.html?index=..%2F..index": { - "title": "Build a Snowflake Native App to Analyze Chairlift Sensor Data", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "apps,dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/power_apps_snowflake/index.html?index=..%2F..index": { - "title": "Getting Started with Power Apps and Snowflake", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,gettingstarted,microsoft,powerapps,powerautomate,powerplatform,web" - }, - "https://quickstarts.snowflake.com/guide/segment-retl-salesforce/index.html?index=..%2F..index": { - "title": "Use Segment Reverse ETL to sync your Snowflake customer table to Salesforce", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "gettingstarted,reverseetl,salesforce,segment,web" - }, - "https://quickstarts.snowflake.com/guide/soda/index.html?index=..%2F..index": { - "title": "Data Quality Testing with Soda", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "dataengineering,dataquality,datascience,datatesting,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/sundeck_opscenter/index.html?index=..%2F..index": { - "title": "Getting Started with Sundeck OpsCenter", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "costmanagement,gettingstarted,querymonitoring,web,workloadanalytics" - }, - "https://quickstarts.snowflake.com/guide/validate_your_customer_identity_model_with_identityqa/index.html?index=..%2F..index": { - "title": "Validate Your Customer Identity Model with IdentityQA", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,gettingstarted,identity,identitymodel,identitymodeling,identityresolution,nativeapps,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_data_lake/index.html?index=..%2F..index": { - "title": "Snowflake for Data Lake", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "datalake,gettingstarted,unstructureddata,web" - }, - "https://quickstarts.snowflake.com/guide/attaining_consumer_insights_with_snowflake_and_microsoft_power_bi/index.html?index=..%2F..index": { - "title": "Attaining Consumer Insights with Snowflake and Microsoft Power BI", - "updated": "2023-08-15T02:36:43-07:00", - "tags": "dataengineering,datascience,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_alation/index.html?index=..%2F..index": { - "title": "Learn How Alation Powers Data Intelligence on Snowflake", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "alation,datacatalog,dataengineering,datagovernance,dataintelligence,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/visual_analytics_powered_by_snowflake_and_tableau/index.html?index=..%2F..index": { - "title": "Visual Analytics powered by Snowflake and Tableau", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datasharing,datavisualization,embeddedanalytics,web" - }, - "https://quickstarts.snowflake.com/guide/resource_optimization_performance_optimization/index.html?index=..%2F..index": { - "title": "Resource Optimization: Performance", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "costoptimization,monitoring,optimization,performance,performanceoptimization,resourceoptimization,web" - }, - "https://quickstarts.snowflake.com/guide/resource_optimization_setup/index.html?index=..%2F..index": { - "title": "Resource Optimization: Setup & Configuration", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "administration,configuration,consumption,costoptimization,monitoring,resourceoptimization,setup,web" - }, - "https://quickstarts.snowflake.com/guide/resource_optimization_usage_monitoring/index.html?index=..%2F..index": { - "title": "Resource Optimization: Usage Monitoring", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "consumption,costoptimization,monitoring,resourceoptimization,usage,usagemonitoring,web" - }, - "https://quickstarts.snowflake.com/guide/snowflake_build_secure_multitenant_data_applications_snowflake_sigma/index.html?index=..%2F..index": { - "title": "Build and Secure Multi-Tenant Data Applications with Snowflake and Sigma", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "sigma,snowflake,web" - }, - "https://quickstarts.snowflake.com/guide/sap_accounts_receivable_to_snowflake_using_adf/index.html?index=..%2F..index": { - "title": "SAP Accounts Receivable to Snowflake using ADF", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "adf,ar,azure,azuredatafactory,dataengineering,dbt,finance,gettingstarted,sap,tableau,web" - }, - "https://quickstarts.snowflake.com/guide/secure_audience_overlaps/index.html?index=..%2F..index": { - "title": "Simple and Secure Audience Overlaps", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "adtech,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/suppress_existing_customers_from_youtube_campaign_with_hightouch_and_snowflake/index.html?index=..%2F..index": { - "title": "Suppress existing customers from a Youtube campaign with Hightouch and Snowflake", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "adtech,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/vhol_snowflake_salesforce_tcrm/index.html?index=..%2F..index": { - "title": "Enrich Salesforce data with Snowflake to deliver your Customer 360", - "updated": "2023-08-15T02:36:56-07:00", - "tags": "customer360,databases,datamarketplace,fileformats,salesforce,stages,tableaucrm,tables,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_cybersyn_financial_and_economic_essentials_app/index.html?index=..%2F..index": { - "title": "Quickstart Guide: Cybersyn Financial & Economic Essentials App", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/getting_started_with_cybersyn_shopify_streamlit_native_app/index.html?index=..%2F..index": { - "title": "Quickstart Guide: Cybersyn Shopify Benchmarks App", - "updated": "2023-08-15T02:36:49-07:00", - "tags": "dataengineering,datascience,gettingstarted,twitter,web" - }, - "https://quickstarts.snowflake.com/guide/developing_tasty_bytes_react_native_application_with_snowflake_sql_api/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Developing React Native Data Application with SQL API", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "api,dataapplications,dataengineering,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/integrating_tasty_bytes_location_recommendation_ml_model_into_the_react_native_data_app/index.html?index=..%2F..index": { - "title": "Integrating Tasty Bytes Location Recommendations ML model into the React Native Data Application", - "updated": "2023-08-15T02:36:52-07:00", - "tags": "api,dataapplications,dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_introduction_ja/index.html?index=..%2F..index": { - "title": "Tasty Bytes\u306e\u7d39\u4ecb", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "gettingstarted,ja,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_introduction/index.html?index=..%2F..index": { - "title": "An Introduction to Tasty Bytes", - "updated": "2023-08-15T02:36:53-07:00", - "tags": "gettingstarted,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_snowpark_101_for_data_science_ja/index.html?index=..%2F..index": { - "title": "Tasty Bytes - \u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u306e\u305f\u3081\u306eSnowpark\u5165\u9580", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datascience,gettingstarted,ja,machinelearning,snowpark,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_snowpark_101_for_data_science/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Snowpark 101 for Data Science", - "updated": "2023-09-18T11:52:17-07:00", - "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,streamlit,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_collaboration_ja/index.html?index=..%2F..index": { - "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u30b3\u30e9\u30dc\u30ec\u30fc\u30b7\u30e7\u30f3", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "gettingstarted,ja,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_collaboration/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Collaboration", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "gettingstarted,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_data_governance/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Data Governance", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "datagovernance,gettingstarted,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_financial_governance/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Financial Governance", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "financialgovernance,gettingstarted,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_geospatial/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Geospatial", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "geospatial,gettingstarted,tastybytes,web,zerotosnowflake" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_semi_structured_data_ja/index.html?index=..%2F..index": { - "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u534a\u69cb\u9020\u5316\u30c7\u30fc\u30bf", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datawarehouse,gettingstarted,ja,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_semi_structured_data/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Semi-Structured Data", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datawarehouse,gettingstarted,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_transformation_ja/index.html?index=..%2F..index": { - "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u5909\u63db", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datawarehouse,gettingstarted,ja,web" - }, - "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_transformation/index.html?index=..%2F..index": { - "title": "Tasty Bytes - Zero to Snowflake - Transformation", - "updated": "2023-08-15T02:36:55-07:00", - "tags": "dataengineering,datawarehouse,gettingstarted,web" - } -} \ No newline at end of file diff --git a/snowscraper/controller.py b/snowscraper/controller.py index c704296..e6d77f2 100644 --- a/snowscraper/controller.py +++ b/snowscraper/controller.py @@ -1,5 +1,3 @@ -import datetime -import json import argparse import importlib import pkgutil @@ -7,25 +5,19 @@ from pathlib import Path SCRAPERS = {} + def register_scraper(cls): SCRAPERS[cls.__name__] = cls return cls + def run_all(args: argparse.Namespace): results = {} for scraper_cls in SCRAPERS.values(): - scraper = scraper_cls() - results.update(scraper.scrape()) + scraper = scraper_cls(after=args.after) + results |= scraper.scrape() print(results) - # Save the results to a JSON file - with open('results.json', 'w') as json_file: - json.dump(results, json_file, indent=4, default=datetime_handler) - -def datetime_handler(obj): - if isinstance(obj, datetime.datetime): - return obj.isoformat() - raise TypeError("Unknown type") def import_scrapers(): directory = Path(__file__).resolve().parent / "scrapers" diff --git a/snowscraper/helpers.py b/snowscraper/helpers.py index 5afbcdb..dc7849d 100644 --- a/snowscraper/helpers.py +++ b/snowscraper/helpers.py @@ -1,15 +1,5 @@ from datetime import datetime -def unix_to_datetime_utc(timestamp_millis): - # Convert to seconds from milliseconds - timestamp_seconds = timestamp_millis / 1000.0 - - # Create a datetime object in UTC - dt_object = datetime.utcfromtimestamp(timestamp_seconds) - - # Format the datetime object as an ISO 8601 string - return dt_object.isoformat() + 'Z' # 'Z' indicates UTC time - def string_to_datetime(date_string): try: diff --git a/snowscraper/scrapers/medium.py b/snowscraper/scrapers/medium.py index fe91c88..3825801 100644 --- a/snowscraper/scrapers/medium.py +++ b/snowscraper/scrapers/medium.py @@ -1,1922 +1,32 @@ from datetime import datetime from datetime import timezone -import requests import feedparser from ..controller import register_scraper -from ..helpers import unix_to_datetime_utc +from ..helpers import string_to_datetime from ..scraper import BaseScraper -LONG_QUERY = {"query": """ -query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) { - collection(id: $collectionId) { - __typename - id - ...PublicationHomepage_collection - } -} - -fragment PublicationHomepage_collection on Collection { - id - ...PublisherHeader_publisher - ...PublisherHomepagePosts_publisher - ...usePublicationAnalytics_collection - ...CollectionMetadata_collection - __typename -} - -fragment PublisherHeader_publisher on Publisher { - id - ...PublisherHeaderBackground_publisher - ...PublisherHeaderNameplate_publisher - ...PublisherHeaderActions_publisher - ...PublisherHeaderNav_publisher - __typename -} - -fragment PublisherHeaderBackground_publisher on Publisher { - __typename - id - customStyleSheet { - ...PublisherHeaderBackground_customStyleSheet - __typename - id - } - ... on Collection { - colorPalette { - tintBackgroundSpectrum { - backgroundColor - __typename - } - __typename - } - isAuroraVisible - legacyHeaderBackgroundImage { - id - originalWidth - focusPercentX - focusPercentY - __typename - } - ...collectionTintBackgroundTheme_collection - __typename - id - } - ...publisherUrl_publisher -} - -fragment PublisherHeaderBackground_customStyleSheet on CustomStyleSheet { - id - global { - colorPalette { - background { - rgb - __typename - } - __typename - } - __typename - } - header { - headerScale - backgroundImageDisplayMode - backgroundImageVerticalAlignment - backgroundColorDisplayMode - backgroundColor { - alpha - rgb - ...getHexFromColorValue_colorValue - ...getOpaqueHexFromColorValue_colorValue - __typename - } - secondaryBackgroundColor { - ...getHexFromColorValue_colorValue - __typename - } - postBackgroundColor { - ...getHexFromColorValue_colorValue - __typename - } - backgroundImage { - id - originalWidth - __typename - } - __typename - } - __typename -} - -fragment getHexFromColorValue_colorValue on ColorValue { - rgb - alpha - __typename -} - -fragment getOpaqueHexFromColorValue_colorValue on ColorValue { - rgb - __typename -} - -fragment collectionTintBackgroundTheme_collection on Collection { - colorPalette { - ...collectionTintBackgroundTheme_colorPalette - __typename - } - customStyleSheet { - id - ...collectionTintBackgroundTheme_customStyleSheet - __typename - } - __typename - id -} - -fragment collectionTintBackgroundTheme_colorPalette on ColorPalette { - ...customTintBackgroundTheme_colorPalette - __typename -} - -fragment customTintBackgroundTheme_colorPalette on ColorPalette { - tintBackgroundSpectrum { - ...ThemeUtil_colorSpectrum - __typename - } - __typename -} - -fragment ThemeUtil_colorSpectrum on ColorSpectrum { - backgroundColor - ...ThemeUtilInterpolateHelpers_colorSpectrum - __typename -} - -fragment ThemeUtilInterpolateHelpers_colorSpectrum on ColorSpectrum { - colorPoints { - ...ThemeUtil_colorPoint - __typename - } - __typename -} - -fragment ThemeUtil_colorPoint on ColorPoint { - color - point - __typename -} - -fragment collectionTintBackgroundTheme_customStyleSheet on CustomStyleSheet { - id - ...customTintBackgroundTheme_customStyleSheet - __typename -} - -fragment customTintBackgroundTheme_customStyleSheet on CustomStyleSheet { - id - global { - colorPalette { - primary { - colorPalette { - ...customTintBackgroundTheme_colorPalette - __typename - } - __typename - } - __typename - } - __typename - } - __typename -} - -fragment publisherUrl_publisher on Publisher { - id - __typename - ... on Collection { - ...collectionUrl_collection - __typename - id - } - ... on User { - ...userUrl_user - __typename - id - } -} - -fragment collectionUrl_collection on Collection { - id - domain - slug - __typename -} - -fragment userUrl_user on User { - __typename - id - customDomainState { - live { - domain - __typename - } - __typename - } - hasSubdomain - username -} - -fragment PublisherHeaderNameplate_publisher on Publisher { - ...PublisherAvatar_publisher - ...PublisherHeaderLogo_publisher - ...PublisherFollowersCount_publisher - __typename -} - -fragment PublisherAvatar_publisher on Publisher { - __typename - ... on Collection { - id - ...CollectionAvatar_collection - __typename - } - ... on User { - id - ...UserAvatar_user - __typename - } -} - -fragment CollectionAvatar_collection on Collection { - name - avatar { - id - __typename - } - ...collectionUrl_collection - __typename - id -} - -fragment UserAvatar_user on User { - __typename - id - imageId - mediumMemberAt - name - username - ...userUrl_user -} - -fragment PublisherHeaderLogo_publisher on Publisher { - __typename - id - customStyleSheet { - id - header { - logoImage { - ...PublisherHeaderLogo_image - __typename - } - appNameColor { - ...getHexFromColorValue_colorValue - __typename - } - appNameTreatment - __typename - } - __typename - } - name - ... on Collection { - isAuroraVisible - logo { - id - originalHeight - originalWidth - __typename - } - __typename - id - } - ... on User { - ...useIsVerifiedBookAuthor_user - __typename - id - } - ...CustomHeaderTooltip_publisher - ...publisherUrl_publisher -} - -fragment PublisherHeaderLogo_image on ImageMetadata { - id - originalHeight - originalWidth - __typename -} - -fragment useIsVerifiedBookAuthor_user on User { - verifications { - isBookAuthor - __typename - } - __typename - id -} - -fragment CustomHeaderTooltip_publisher on Publisher { - __typename - id - customStyleSheet { - id - header { - appNameTreatment - nameTreatment - __typename - } - __typename - } - ... on Collection { - isAuroraVisible - slug - __typename - id - } -} - -fragment PublisherFollowersCount_publisher on Publisher { - id - __typename - id - ... on Collection { - slug - subscriberCount - ...collectionUrl_collection - __typename - id - } - ... on User { - socialStats { - followerCount - __typename - } - username - ...userUrl_user - __typename - id - } -} - -fragment PublisherHeaderActions_publisher on Publisher { - __typename - ...MetaHeaderPubMenu_publisher - ... on Collection { - ...CollectionFollowButton_collection - __typename - id - } - ... on User { - ...FollowAndSubscribeButtons_user - __typename - id - } -} - -fragment MetaHeaderPubMenu_publisher on Publisher { - __typename - ... on Collection { - ...MetaHeaderPubMenu_publisher_collection - __typename - id - } - ... on User { - ...MetaHeaderPubMenu_publisher_user - __typename - id - } -} - -fragment MetaHeaderPubMenu_publisher_collection on Collection { - id - slug - name - domain - newsletterV3 { - slug - __typename - id - } - ...MutePopoverOptions_collection - __typename -} - -fragment MutePopoverOptions_collection on Collection { - id - __typename -} - -fragment MetaHeaderPubMenu_publisher_user on User { - id - username - ...MutePopoverOptions_creator - __typename -} - -fragment MutePopoverOptions_creator on User { - id - __typename -} - -fragment CollectionFollowButton_collection on Collection { - __typename - id - name - slug - ...collectionUrl_collection - ...SusiClickable_collection -} - -fragment SusiClickable_collection on Collection { - ...SusiContainer_collection - __typename - id -} - -fragment SusiContainer_collection on Collection { - name - ...SignInOptions_collection - ...SignUpOptions_collection - __typename - id -} - -fragment SignInOptions_collection on Collection { - id - name - __typename -} - -fragment SignUpOptions_collection on Collection { - id - name - __typename -} - -fragment FollowAndSubscribeButtons_user on User { - ...UserFollowButton_user - ...UserSubscribeButton_user - __typename - id -} - -fragment UserFollowButton_user on User { - ...UserFollowButtonSignedIn_user - ...UserFollowButtonSignedOut_user - __typename - id -} - -fragment UserFollowButtonSignedIn_user on User { - id - name - __typename -} - -fragment UserFollowButtonSignedOut_user on User { - id - ...SusiClickable_user - __typename -} - -fragment SusiClickable_user on User { - ...SusiContainer_user - __typename - id -} - -fragment SusiContainer_user on User { - ...SignInOptions_user - ...SignUpOptions_user - __typename - id -} - -fragment SignInOptions_user on User { - id - name - __typename -} - -fragment SignUpOptions_user on User { - id - name - __typename -} - -fragment UserSubscribeButton_user on User { - id - isPartnerProgramEnrolled - name - viewerEdge { - id - isFollowing - isUser - __typename - } - viewerIsUser - newsletterV3 { - id - ...useNewsletterV3Subscription_newsletterV3 - __typename - } - ...useNewsletterV3Subscription_user - ...MembershipUpsellModal_user - __typename -} - -fragment useNewsletterV3Subscription_newsletterV3 on NewsletterV3 { - id - type - slug - name - collection { - slug - __typename - id - } - user { - id - name - username - newsletterV3 { - id - __typename - } - __typename - } - __typename -} - -fragment useNewsletterV3Subscription_user on User { - id - username - newsletterV3 { - ...useNewsletterV3Subscription_newsletterV3 - __typename - id - } - __typename -} - -fragment MembershipUpsellModal_user on User { - id - name - imageId - postSubscribeMembershipUpsellShownAt - newsletterV3 { - id - __typename - } - __typename -} - -fragment PublisherHeaderNav_publisher on Publisher { - __typename - id - customStyleSheet { - navigation { - navItems { - name - ...PublisherHeaderNavLink_headerNavigationItem - __typename - } - __typename - } - __typename - id - } - ...PublisherHeaderNavLink_publisher - ... on Collection { - domain - isAuroraVisible - slug - navItems { - tagSlug - title - url - __typename - } - __typename - id - } - ... on User { - customDomainState { - live { - domain - __typename - } - __typename - } - hasSubdomain - username - homePostsPublished: homepagePostsConnection(paging: {limit: 1}) { - posts { - id - __typename - } - __typename - } - ...useIsVerifiedBookAuthor_user - __typename - id - } -} - -fragment PublisherHeaderNavLink_headerNavigationItem on HeaderNavigationItem { - href - name - tags { - id - normalizedTagSlug - __typename - } - type - __typename -} - -fragment PublisherHeaderNavLink_publisher on Publisher { - __typename - id - ... on Collection { - slug - __typename - id - } -} - -fragment PublisherHomepagePosts_publisher on Publisher { - __typename - id - homepagePostsConnection( - paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom} - includeDistributedResponses: $includeDistributedResponses - ) { - posts { - inResponseToPostResult { - __typename - } - ...WithResponsesSidebar_post - ...PostPreview_post - __typename - } - pagingInfo { - next { - from - limit - __typename - } - __typename - } - __typename - } - ...CardByline_publisher - ...NewsletterV3Promo_publisher - ...PublisherHomepagePosts_user -} - -fragment WithResponsesSidebar_post on Post { - id - ...ThreadedResponsesSidebar_post - __typename -} - -fragment ThreadedResponsesSidebar_post on Post { - id - ...ThreadedResponsesSidebarContent_post - __typename -} - -fragment ThreadedResponsesSidebarContent_post on Post { - id - postResponses { - count - __typename - } - collection { - id - viewerEdge { - id - isEditor - __typename - } - __typename - } - creator { - id - __typename - } - ...ThreadedReplies_post - __typename -} - -fragment ThreadedReplies_post on Post { - __typename - id - ...ThreadedReply_post -} - -fragment ThreadedReply_post on Post { - __typename - id - ...ReadOrEditSimpleResponse_post - ...StoryResponse_post -} - -fragment ReadOrEditSimpleResponse_post on Post { - __typename - id - ...SimpleResponse_post -} - -fragment SimpleResponse_post on Post { - id - ...ResponseHeader_post - __typename -} - -fragment ResponseHeader_post on Post { - __typename - id - createdAt - firstPublishedAt - latestPublishedAt - creator { - id - name - ...UserAvatar_user - ...useIsVerifiedBookAuthor_user - ...UserMentionTooltip_user - __typename - } - ...ResponsePopoverMenu_post -} - -fragment UserMentionTooltip_user on User { - id - name - username - bio - imageId - mediumMemberAt - ...UserAvatar_user - ...UserFollowButton_user - ...useIsVerifiedBookAuthor_user - __typename -} - -fragment ResponsePopoverMenu_post on Post { - id - ...ReportUserMenuItem_post - ...HideResponseMenuItem_post - ...BlockUserMenuItem_post - ...UndoClapsMenuItem_post - __typename -} - -fragment ReportUserMenuItem_post on Post { - __typename - id - creator { - id - __typename - } - ...SusiClickable_post -} - -fragment SusiClickable_post on Post { - id - mediumUrl - ...SusiContainer_post - __typename -} - -fragment SusiContainer_post on Post { - id - __typename -} - -fragment HideResponseMenuItem_post on Post { - __typename - id - collection { - id - viewerEdge { - id - isEditor - __typename - } - __typename - } - creator { - id - __typename - } -} - -fragment BlockUserMenuItem_post on Post { - __typename - id - creator { - id - __typename - } -} - -fragment UndoClapsMenuItem_post on Post { - id - clapCount - __typename -} - -fragment StoryResponse_post on Post { - id - ...ResponseHeader_post - __typename -} - -fragment PostPreview_post on Post { - id - creator { - ...PostPreview_user - __typename - id - } - collection { - ...CardByline_collection - ...ExpandablePostByline_collection - __typename - id - } - ...InteractivePostBody_postPreview - firstPublishedAt - isLocked - isSeries - latestPublishedAt - inResponseToCatalogResult { - __typename - } - pinnedAt - pinnedByCreatorAt - previewImage { - id - focusPercentX - focusPercentY - __typename - } - readingTime - sequence { - slug - __typename - } - title - uniqueSlug - ...CardByline_post - ...PostFooterActionsBar_post - ...InResponseToEntityPreview_post - ...PostScrollTracker_post - ...HighDensityPreview_post - __typename -} - -fragment PostPreview_user on User { - __typename - name - username - ...CardByline_user - ...ExpandablePostByline_user - id -} - -fragment CardByline_user on User { - __typename - id - name - username - mediumMemberAt - socialStats { - followerCount - __typename - } - ...useIsVerifiedBookAuthor_user - ...userUrl_user - ...UserMentionTooltip_user -} - -fragment ExpandablePostByline_user on User { - __typename - id - name - imageId - ...userUrl_user - ...useIsVerifiedBookAuthor_user -} - -fragment CardByline_collection on Collection { - name - ...collectionUrl_collection - __typename - id -} - -fragment ExpandablePostByline_collection on Collection { - __typename - id - name - domain - slug -} - -fragment InteractivePostBody_postPreview on Post { - extendedPreviewContent( - truncationConfig: {previewParagraphsWordCountThreshold: 400, minimumWordLengthForTruncation: 150, truncateAtEndOfSentence: true, showFullImageCaptions: true, shortformPreviewParagraphsWordCountThreshold: 30, shortformMinimumWordLengthForTruncation: 30} - ) { - bodyModel { - ...PostBody_bodyModel - __typename - } - isFullContent - __typename - } - __typename - id -} - -fragment PostBody_bodyModel on RichText { - sections { - name - startIndex - textLayout - imageLayout - backgroundImage { - id - originalHeight - originalWidth - __typename - } - videoLayout - backgroundVideo { - videoId - originalHeight - originalWidth - previewImageId - __typename - } - __typename - } - paragraphs { - id - ...PostBodySection_paragraph - __typename - } - ...normalizedBodyModel_richText - __typename -} - -fragment PostBodySection_paragraph on Paragraph { - name - ...PostBodyParagraph_paragraph - __typename - id -} - -fragment PostBodyParagraph_paragraph on Paragraph { - name - type - ...ImageParagraph_paragraph - ...TextParagraph_paragraph - ...IframeParagraph_paragraph - ...MixtapeParagraph_paragraph - ...CodeBlockParagraph_paragraph - __typename - id -} - -fragment ImageParagraph_paragraph on Paragraph { - href - layout - metadata { - id - originalHeight - originalWidth - focusPercentX - focusPercentY - alt - __typename - } - ...Markups_paragraph - ...ParagraphRefsMapContext_paragraph - ...PostAnnotationsMarker_paragraph - __typename - id -} - -fragment Markups_paragraph on Paragraph { - name - text - hasDropCap - dropCapImage { - ...MarkupNode_data_dropCapImage - __typename - id - } - markups { - ...Markups_markup - __typename - } - __typename - id -} - -fragment MarkupNode_data_dropCapImage on ImageMetadata { - ...DropCap_image - __typename - id -} - -fragment DropCap_image on ImageMetadata { - id - originalHeight - originalWidth - __typename -} - -fragment Markups_markup on Markup { - type - start - end - href - anchorType - userId - linkMetadata { - httpStatus - __typename - } - __typename -} - -fragment ParagraphRefsMapContext_paragraph on Paragraph { - id - name - text - __typename -} - -fragment PostAnnotationsMarker_paragraph on Paragraph { - ...PostViewNoteCard_paragraph - __typename - id -} - -fragment PostViewNoteCard_paragraph on Paragraph { - name - __typename - id -} - -fragment TextParagraph_paragraph on Paragraph { - type - hasDropCap - codeBlockMetadata { - mode - lang - __typename - } - ...Markups_paragraph - ...ParagraphRefsMapContext_paragraph - __typename - id -} - -fragment IframeParagraph_paragraph on Paragraph { - type - iframe { - mediaResource { - id - iframeSrc - iframeHeight - iframeWidth - title - __typename - } - __typename - } - layout - ...Markups_paragraph - __typename - id -} - -fragment MixtapeParagraph_paragraph on Paragraph { - type - mixtapeMetadata { - href - mediaResource { - mediumCatalog { - id - __typename - } - __typename - } - __typename - } - ...GenericMixtapeParagraph_paragraph - __typename - id -} - -fragment GenericMixtapeParagraph_paragraph on Paragraph { - text - mixtapeMetadata { - href - thumbnailImageId - __typename - } - markups { - start - end - type - href - __typename - } - __typename - id -} - -fragment CodeBlockParagraph_paragraph on Paragraph { - codeBlockMetadata { - lang - mode - __typename - } - __typename - id -} - -fragment normalizedBodyModel_richText on RichText { - paragraphs { - ...normalizedBodyModel_richText_paragraphs - __typename - } - sections { - startIndex - ...getSectionEndIndex_section - __typename - } - ...getParagraphStyles_richText - ...getParagraphSpaces_richText - __typename -} - -fragment normalizedBodyModel_richText_paragraphs on Paragraph { - markups { - ...normalizedBodyModel_richText_paragraphs_markups - __typename - } - codeBlockMetadata { - lang - mode - __typename - } - ...getParagraphHighlights_paragraph - ...getParagraphPrivateNotes_paragraph - __typename - id -} - -fragment normalizedBodyModel_richText_paragraphs_markups on Markup { - type - __typename -} - -fragment getParagraphHighlights_paragraph on Paragraph { - name - __typename - id -} - -fragment getParagraphPrivateNotes_paragraph on Paragraph { - name - __typename - id -} - -fragment getSectionEndIndex_section on Section { - startIndex - __typename -} - -fragment getParagraphStyles_richText on RichText { - paragraphs { - text - type - __typename - } - sections { - ...getSectionEndIndex_section - __typename - } - __typename -} - -fragment getParagraphSpaces_richText on RichText { - paragraphs { - layout - metadata { - originalHeight - originalWidth - id - __typename - } - type - ...paragraphExtendsImageGrid_paragraph - __typename - } - ...getSeriesParagraphTopSpacings_richText - ...getPostParagraphTopSpacings_richText - __typename -} - -fragment paragraphExtendsImageGrid_paragraph on Paragraph { - layout - type - __typename - id -} - -fragment getSeriesParagraphTopSpacings_richText on RichText { - paragraphs { - id - __typename - } - sections { - ...getSectionEndIndex_section - __typename - } - __typename -} - -fragment getPostParagraphTopSpacings_richText on RichText { - paragraphs { - type - layout - text - codeBlockMetadata { - lang - mode - __typename - } - __typename - } - sections { - ...getSectionEndIndex_section - __typename - } - __typename -} - -fragment CardByline_post on Post { - ...DraftStatus_post - ...Star_post - ...shouldShowPublishedInStatus_post - __typename - id -} - -fragment DraftStatus_post on Post { - id - pendingCollection { - id - creator { - id - __typename - } - ...BoldCollectionName_collection - __typename - } - statusForCollection - creator { - id - __typename - } - isPublished - __typename -} - -fragment BoldCollectionName_collection on Collection { - id - name - __typename -} - -fragment Star_post on Post { - id - creator { - id - __typename - } - __typename -} - -fragment shouldShowPublishedInStatus_post on Post { - statusForCollection - isPublished - __typename - id -} - -fragment PostFooterActionsBar_post on Post { - id - visibility - allowResponses - postResponses { - count - __typename - } - isLimitedState - creator { - id - __typename - } - collection { - id - __typename - } - ...MultiVote_post - ...PostSharePopover_post - ...OverflowMenuButtonWithNegativeSignal_post - ...PostPageBookmarkButton_post - __typename -} - -fragment MultiVote_post on Post { - id - creator { - id - ...SusiClickable_user - __typename - } - isPublished - ...SusiClickable_post - collection { - id - slug - __typename - } - isLimitedState - ...MultiVoteCount_post - __typename -} - -fragment MultiVoteCount_post on Post { - id - __typename -} - -fragment PostSharePopover_post on Post { - id - mediumUrl - title - isPublished - isLocked - ...usePostUrl_post - ...FriendLink_post - __typename -} - -fragment usePostUrl_post on Post { - id - creator { - ...userUrl_user - __typename - id - } - collection { - id - domain - slug - __typename - } - isSeries - mediumUrl - sequence { - slug - __typename - } - uniqueSlug - __typename -} - -fragment FriendLink_post on Post { - id - ...SusiClickable_post - ...useCopyFriendLink_post - __typename -} - -fragment useCopyFriendLink_post on Post { - ...usePostUrl_post - __typename - id -} - -fragment OverflowMenuButtonWithNegativeSignal_post on Post { - id - visibility - ...OverflowMenuWithNegativeSignal_post - __typename -} - -fragment OverflowMenuWithNegativeSignal_post on Post { - id - creator { - id - __typename - } - collection { - id - __typename - } - ...OverflowMenuItemUndoClaps_post - ...AddToCatalogBase_post - __typename -} - -fragment OverflowMenuItemUndoClaps_post on Post { - id - clapCount - ...ClapMutation_post - __typename -} - -fragment ClapMutation_post on Post { - __typename - id - clapCount - ...MultiVoteCount_post -} - -fragment AddToCatalogBase_post on Post { - id - isPublished - __typename -} - -fragment PostPageBookmarkButton_post on Post { - ...AddToCatalogBookmarkButton_post - __typename - id -} - -fragment AddToCatalogBookmarkButton_post on Post { - ...AddToCatalogBase_post - __typename - id -} - -fragment InResponseToEntityPreview_post on Post { - id - inResponseToEntityType - __typename -} - -fragment PostScrollTracker_post on Post { - id - collection { - id - __typename - } - sequence { - sequenceId - __typename - } - __typename -} - -fragment HighDensityPreview_post on Post { - id - title - previewImage { - id - focusPercentX - focusPercentY - __typename - } - extendedPreviewContent( - truncationConfig: {previewParagraphsWordCountThreshold: 400, minimumWordLengthForTruncation: 150, truncateAtEndOfSentence: true, showFullImageCaptions: true, shortformPreviewParagraphsWordCountThreshold: 30, shortformMinimumWordLengthForTruncation: 30} - ) { - subtitle - __typename - } - ...HighDensityFooter_post - __typename -} - -fragment HighDensityFooter_post on Post { - id - readingTime - tags { - ...TopicPill_tag - __typename - } - ...BookmarkButton_post - ...ExpandablePostCardOverflowButton_post - ...OverflowMenuButtonWithNegativeSignal_post - __typename -} - -fragment TopicPill_tag on Tag { - __typename - id - displayTitle - normalizedTagSlug -} - -fragment BookmarkButton_post on Post { - visibility - ...SusiClickable_post - ...AddToCatalogBookmarkButton_post - __typename - id -} - -fragment ExpandablePostCardOverflowButton_post on Post { - creator { - id - __typename - } - ...ExpandablePostCardReaderButton_post - __typename - id -} - -fragment ExpandablePostCardReaderButton_post on Post { - id - collection { - id - __typename - } - creator { - id - __typename - } - clapCount - ...ClapMutation_post - __typename -} - -fragment CardByline_publisher on Publisher { - __typename - ... on User { - id - ...CardByline_user - __typename - } - ... on Collection { - id - ...CardByline_collection - __typename - } -} - -fragment NewsletterV3Promo_publisher on Publisher { - __typename - ... on User { - ...NewsletterV3Promo_user - __typename - id - } - ... on Collection { - ...NewsletterV3Promo_collection - __typename - id - } -} - -fragment NewsletterV3Promo_user on User { - id - username - name - viewerEdge { - isUser - __typename - id - } - newsletterV3 { - id - ...NewsletterV3Promo_newsletterV3 - __typename - } - __typename -} - -fragment NewsletterV3Promo_newsletterV3 on NewsletterV3 { - slug - name - description - promoHeadline - promoBody - ...NewsletterSubscribeComponent_newsletterV3 - __typename - id -} - -fragment NewsletterSubscribeComponent_newsletterV3 on NewsletterV3 { - ...NewsletterV3SubscribeButton_newsletterV3 - ...NewsletterV3SubscribeByEmail_newsletterV3 - __typename - id -} - -fragment NewsletterV3SubscribeButton_newsletterV3 on NewsletterV3 { - id - name - slug - type - user { - id - name - username - __typename - } - collection { - slug - ...SusiClickable_collection - ...collectionDefaultBackgroundTheme_collection - __typename - id - } - ...SusiClickable_newsletterV3 - ...useNewsletterV3Subscription_newsletterV3 - __typename -} - -fragment collectionDefaultBackgroundTheme_collection on Collection { - colorPalette { - ...collectionDefaultBackgroundTheme_colorPalette - __typename - } - customStyleSheet { - id - ...collectionDefaultBackgroundTheme_customStyleSheet - __typename - } - __typename - id -} - -fragment collectionDefaultBackgroundTheme_colorPalette on ColorPalette { - ...customDefaultBackgroundTheme_colorPalette - __typename -} - -fragment customDefaultBackgroundTheme_colorPalette on ColorPalette { - highlightSpectrum { - ...ThemeUtil_colorSpectrum - __typename - } - defaultBackgroundSpectrum { - ...ThemeUtil_colorSpectrum - __typename - } - tintBackgroundSpectrum { - ...ThemeUtil_colorSpectrum - __typename - } - __typename -} - -fragment collectionDefaultBackgroundTheme_customStyleSheet on CustomStyleSheet { - id - ...customDefaultBackgroundTheme_customStyleSheet - __typename -} - -fragment customDefaultBackgroundTheme_customStyleSheet on CustomStyleSheet { - id - global { - colorPalette { - primary { - colorPalette { - ...customDefaultBackgroundTheme_colorPalette - __typename - } - __typename - } - background { - colorPalette { - ...customDefaultBackgroundTheme_colorPalette - __typename - } - __typename - } - __typename - } - __typename - } - __typename -} - -fragment SusiClickable_newsletterV3 on NewsletterV3 { - ...SusiContainer_newsletterV3 - __typename - id -} - -fragment SusiContainer_newsletterV3 on NewsletterV3 { - ...SignInOptions_newsletterV3 - ...SignUpOptions_newsletterV3 - __typename - id -} - -fragment SignInOptions_newsletterV3 on NewsletterV3 { - id - name - __typename -} - -fragment SignUpOptions_newsletterV3 on NewsletterV3 { - id - name - __typename -} - -fragment NewsletterV3SubscribeByEmail_newsletterV3 on NewsletterV3 { - id - slug - type - user { - id - name - username - __typename - } - collection { - ...collectionDefaultBackgroundTheme_collection - ...collectionUrl_collection - __typename - id - } - __typename -} - -fragment NewsletterV3Promo_collection on Collection { - id - slug - domain - name - newsletterV3 { - id - ...NewsletterV3Promo_newsletterV3 - __typename - } - __typename -} - -fragment PublisherHomepagePosts_user on User { - id - ...useShowAuthorNewsletterV3Promo_user - __typename -} - -fragment useShowAuthorNewsletterV3Promo_user on User { - id - username - newsletterV3 { - id - showPromo - slug - __typename - } - __typename -} - -fragment usePublicationAnalytics_collection on Collection { - id - googleAnalyticsId - __typename -} - -fragment CollectionMetadata_collection on Collection { - avatar { - id - focusPercentX - focusPercentY - originalHeight - originalWidth - __typename - } - creator { - id - twitterScreenName - ...userUrl_user - __typename - } - description - domain - facebookPageId - name - tags - twitterUsername - createdAt - ptsQualifiedAt - customDomainState { - live { - status - isSubdomain - __typename - } - __typename - } - ...collectionUrl_collection - ...CollectionJsonLd_collection - __typename - id -} - -fragment CollectionJsonLd_collection on Collection { - id - logo { - ...PrepareLogoForJsonLd_imageMetadata - __typename - id - } - avatar { - id - focusPercentX - focusPercentY - originalHeight - originalWidth - __typename - } - domain - name - ...collectionUrl_collection - __typename -} - -fragment PrepareLogoForJsonLd_imageMetadata on ImageMetadata { - id - originalWidth - originalHeight - __typename -} -""" } - @register_scraper class MediumScraper(BaseScraper): - url = "https://medium.com/_/graphql" + url = "https://medium.com/feed/snowflake/tagged/snowflake" - def __init__(self, *args, **kwargs): + def __init__(self, after, *args, **kwargs): super(MediumScraper, self).__init__(*args, **kwargs) self.data = {} - self.after = datetime(1970, 1, 1, tzinfo=timezone.utc) - - def make_request(self, query_vars): - response = requests.post(self.url, json=query_vars) - post_data = response.json()["data"]["collection"]["homepagePostsConnection"] - paging_info = post_data['pagingInfo'] - return post_data["posts"], paging_info + self.after = after or datetime(1970, 1, 1, tzinfo=timezone.utc) def scrape(self): print("Scraping Medium") - query_vars = LONG_QUERY | { - "variables": { - "homepagePostsLimit": 25, - "includeDistributedResponses": False, - "collectionId": "34b6daafc07", - "homepagePostsFrom": "0" - } - } - - while True: - posts, paging_info = self.make_request(query_vars) - - for post in posts: - if post["visibility"] == "PUBLIC": - self.data[post["mediumUrl"]] = { - "title": post["title"], - "published": unix_to_datetime_utc(post["firstPublishedAt"]), - "updated": unix_to_datetime_utc(post["latestPublishedAt"]) - } - - if paging_info is None: - break - - query_vars['variables']['homepagePostsFrom'] = paging_info['next']['from'] - query_vars['variables']['homepagePostsLimit'] = paging_info['next']['limit'] - + for entry in feedparser.parse(MediumScraper.url)["entries"]: + updated = string_to_datetime(entry["updated"]) + if updated > self.after: + self.data[entry["link"]] = { + "title": entry["title"], + "published": string_to_datetime(entry["published"]), + "updated": updated, + } return self.data def transform(self): diff --git a/snowscraper/scrapers/quickstarts.py b/snowscraper/scrapers/quickstarts.py new file mode 100644 index 0000000..4b3ff07 --- /dev/null +++ b/snowscraper/scrapers/quickstarts.py @@ -0,0 +1,53 @@ +from datetime import datetime +from datetime import timezone + +import scrapy +from scrapy import signals +from scrapy.crawler import CrawlerProcess +from scrapy.signalmanager import dispatcher + +from ..controller import register_scraper +from ..scraper import BaseScraper +from snowscraper.helpers import string_to_datetime + +QuickStartsURL = "https://quickstarts.snowflake.com/" + + +@register_scraper +class QuickstartScraper(BaseScraper, scrapy.Spider): + name = "snowflakespider" + + def __init__(self, after, *args, **kwargs): + super(QuickstartScraper, self).__init__(*args, **kwargs) + self.data = {} + self.after = after or datetime(1970, 1, 1, tzinfo=timezone.utc) + + def start_requests(self): + yield scrapy.Request(url=QuickStartsURL, callback=self.parse) + + def signal_handler(self, signal, sender, item, response, spider): + self.data[item["key"]] = item + self.data[item["key"]].pop("key") + + def scrape(self): + print("Scraping Quickstarts") + dispatcher.connect(self.signal_handler, signal=signals.item_scraped) + process = CrawlerProcess({"LOG_LEVEL": "ERROR"}) + process.crawl(QuickstartScraper, after=self.after) + process.start() + return self.data + + def parse(self, response): + for card in response.css("card-sorter#cards > a.codelab-card"): + updated = string_to_datetime(card.attrib["data-updated"]) + if updated > self.after: + key = QuickStartsURL.rstrip("/") + card.attrib["href"] + yield { + "key": key, + "title": card.attrib["data-title"], + "updated": updated, + "tags": card.attrib["data-tags"], + } + + def transform(self): + return self.data