diff --git a/.github/workflows/scrape_and_commit.yml b/.github/workflows/scrape_and_commit.yml new file mode 100644 index 0000000..53b180a --- /dev/null +++ b/.github/workflows/scrape_and_commit.yml @@ -0,0 +1,35 @@ + +name: Run Snowscraper and Commit Changes + +on: + schedule: + - cron: '0 0 * * *' # Run daily at midnight + workflow_dispatch: # Allow manual trigger + +jobs: + scrape-and-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run Scraper + run: python -m snowscraper.cli + + - name: Commit and Push Changes + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add -A + git commit -m "Update scraped data" || echo "No changes to commit" + git push diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..47b30eb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +feedparser +scrapy \ No newline at end of file diff --git a/results.json b/results.json new file mode 100644 index 0000000..0993a07 --- /dev/null +++ b/results.json @@ -0,0 +1,867 @@ +{ + "https://medium.com/snowflake/simplifying-security-data-ingestion-recent-snowflake-features-minimize-cost-and-complexity-b72d7eba77ab?source=rss----34b6daafc07---4": { + "title": "Simplifying Security Data Ingestion: Recent Snowflake features minimize cost and complexity", + "published": "2023-09-18T20:07:44", + "updated": "2023-09-18T23:50:04.696000+00:00" + }, + "https://medium.com/snowflake/ip-protection-in-snowflake-native-apps-dc45173af152?source=rss----34b6daafc07---4": { + "title": "IP Protection in Snowflake Native Apps", + "published": "2023-09-18T19:01:49", + "updated": "2023-09-18T19:01:48.495000+00:00" + }, + "https://medium.com/snowflake/sis-application-development-ci-cd-setup-with-github-actions-3a4279f57287?source=rss----34b6daafc07---4": { + "title": "SiS Application Development: CI/CD Setup with GitHub Actions", + "published": "2023-09-15T19:48:20", + "updated": "2023-09-15T19:48:20.672000+00:00" + }, + "https://medium.com/snowflake/finops-for-snowflake-f37d531eb8e2?source=rss----34b6daafc07---4": { + "title": "FinOps for Snowflake", + "published": "2023-09-15T17:40:31", + "updated": "2023-09-15T17:40:31.497000+00:00" + }, + "https://medium.com/snowflake/improving-llms-management-in-snowflake-e7e5c045f2db?source=rss----34b6daafc07---4": { + "title": "Improving LLMs management in Snowflake", + "published": "2023-09-15T17:01:58", + "updated": "2023-09-15T17:01:58.209000+00:00" + }, + "https://medium.com/snowflake/7-guardrails-against-common-mistakes-that-inflate-snowflake-credit-usage-cc2bf5421681?source=rss----34b6daafc07---4": { + "title": "7 guardrails against common mistakes that inflate Snowflake credit usage", + "published": "2023-09-15T16:13:01", + "updated": "2023-09-15T16:13:01.391000+00:00" + }, + "https://medium.com/snowflake/simplifying-data-ingestion-creating-a-data-pipeline-in-snowflake-with-sftp-e99033f230c2?source=rss----34b6daafc07---4": { + "title": "Simplifying Data Ingestion: Creating a Data pipeline in Snowflake with SFTP", + "published": "2023-09-14T19:01:47", + "updated": "2023-09-15T14:47:09.558000+00:00" + }, + "https://medium.com/snowflake/data-sharing-patterns-in-snowflake-3b526729efd7?source=rss----34b6daafc07---4": { + "title": "Data Sharing Patterns in Snowflake", + "published": "2023-09-14T15:51:56", + "updated": "2023-09-15T06:15:22.329000+00:00" + }, + "https://medium.com/snowflake/deep-dive-into-security-and-performance-isolation-of-snowflake-virtual-warehouses-73bdecc69f4?source=rss----34b6daafc07---4": { + "title": "Deep dive into security and performance isolation of Snowflake virtual warehouses", + "published": "2023-09-12T18:11:43", + "updated": "2023-09-15T02:10:02.212000+00:00" + }, + "https://medium.com/snowflake/snowflake-gen-ai-assistant-a838e1942d21?source=rss----34b6daafc07---4": { + "title": "Snowflake Gen-AI Assistant", + "published": "2023-09-12T11:01:39", + "updated": "2023-09-12T11:01:39.062000+00:00" + }, + "https://quickstarts.snowflake.com/guide/accelerate_your_graphql_development_on_snowflake_with_hasura/index.html?index=..%2F..index": { + "title": "Accelerate Your GraphQL Development on Snowflake with Hasura", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "api,gettingstarted,graphql,hasura,quickstart,rest,web" + }, + "https://quickstarts.snowflake.com/guide/analyzing_real_estate_properties_with_streamlit/index.html?index=..%2F..index": { + "title": "Analyzing real estate properties using Streamlit", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "datascience&ml,solutionexamples,web" + }, + "https://quickstarts.snowflake.com/guide/build_customer_facing_applications_using_sigma_and_snowflake/index.html?index=..%2F..index": { + "title": "Build Customer Facing Applications Using Sigma and Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "dataapplications,dataengineering,gettingstarted,sigma,web" + }, + "https://quickstarts.snowflake.com/guide/data_app/index.html?index=..%2F..index": { + "title": "Building a Data Application", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "api,dataapplications,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_thoughtspot/index.html?index=..%2F..index": { + "title": "Build a ReactJS app with ThoughtSpot and Snowflake", + "updated": "2023-08-15T02:36:48-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_data_marketplace_app/index.html?index=..%2F..index": { + "title": "Building an application on Snowflake with data from Snowflake Marketplace", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/dcdf_incremental_processing/index.html?index=..%2F..index": { + "title": "Getting Started with DCDF Data Architecture Incremental Processing & Logical Partitions", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataarchitecture,dataclouddeploymentframework,dataengineering,dcdf,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/servicenow_to_snowflake_connector/index.html?index=..%2F..index": { + "title": "Snowflake Connector for ServiceNow Installation", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "connectors,dataengineering,servicenow,web" + }, + "https://quickstarts.snowflake.com/guide/altr_get_started/index.html?index=..%2F..index": { + "title": "ALTR Quickstart - Data Access Control", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "dataowner,datasecurity,datasteward,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/cloudtrail_log_ingestion/index.html?index=..%2F..index": { + "title": "AWS Cloudtrail Ingestion", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "aws,cybersecurity,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_pii/index.html?index=..%2F..index": { + "title": "Process PII data using Snowflake RBAC, DAC, Row Access Policies, and Column Level Security", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "compliance,datagovernance,gettingstarted,masking,pii,rowlevelsecurity,security,sensitivedata,web" + }, + "https://quickstarts.snowflake.com/guide/integrating_fluentd_with_snowflake/index.html?index=..%2F..index": { + "title": "Using Fluentd to Send Log Files to Snowflake for Security Analytics", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "datagovernance,siem,web" + }, + "https://quickstarts.snowflake.com/guide/python_camouflage/index.html?index=..%2F..index": { + "title": "Tokenization in Snowflake Using Python UDFs (Python Camouflage)", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "dataengineering,datascience,encryption,gettingstarted,python,security,tokenization,web" + }, + "https://quickstarts.snowflake.com/guide/s3_access_log_ingestion/index.html?index=..%2F..index": { + "title": "AWS S3 Access Logs Ingestion", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "cybersecurity,datalossprotection,web" + }, + "https://quickstarts.snowflake.com/guide/security_dashboards_for_snowflake/index.html?index=..%2F..index": { + "title": "Snowflake Security Dashboards", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "dashboards,security,snowsight,web" + }, + "https://quickstarts.snowflake.com/guide/vpc_flow_log_ingestion/index.html?index=..%2F..index": { + "title": "AWS VPC Flow Logs Ingestion", + "updated": "2023-08-15T02:36:56-07:00", + "tags": "cspm,cybersecurity,siem,vpcflowlogs,web" + }, + "https://quickstarts.snowflake.com/guide/auto_ingest_twitter_data/index.html?index=..%2F..index": { + "title": "Auto-Ingest Twitter Data into Snowflake", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "autoingest,cloudstorage,snowpipe,twitter" + }, + "https://quickstarts.snowflake.com/guide/build_a_data_clean_room_in_snowflake_advanced/index.html?index=..%2F..index": { + "title": "Build A Data Clean Room in Snowflake - Advanced", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "datacleanrooms,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_a_multiparty_clean_room_in_snowflake/index.html?index=..%2F..index": { + "title": "Build a Multiparty Data Clean Room in Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "datacleanrooms,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/cdc_data_from_oracle_to_snowflake_in_streamsets/index.html?index=..%2F..index": { + "title": "Process Change Data Capture (CDC) data from Oracle to Snowflake Using StreamSets", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "oraclecdc,web" + }, + "https://quickstarts.snowflake.com/guide/CDC_SnowpipeStreaming_DynamicTables/index.html?index=..%2F..index": { + "title": "Snowpipe Streaming and Dynamic Tables for Real-Time Ingestion (CDC Use Case)", + "updated": "2023-09-18T11:53:39-07:00", + "tags": "cdc,dataengineering,dynamictables,financialservices,snowpipe,streaming,web" + }, + "https://quickstarts.snowflake.com/guide/cloud_native_data_engineering_with_matillion_and_snowflake/index.html?index=..%2F..index": { + "title": "Cloud Native Data Engineering with Matillion and Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "dataengineering,datatransformation,gettingstarted,partner,web" + }, + "https://quickstarts.snowflake.com/guide/cross_cloud_business_continuity/index.html?index=..%2F..index": { + "title": "Cross Cloud Business Continuity With Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/data_engineering_streaming_integration/index.html?index=..%2F..index": { + "title": "Streaming Data Integration with Snowflake", + "updated": "2023-08-15T02:36:45-07:00", + "tags": "dataengineering,snowpipe,streaming,web" + }, + "https://quickstarts.snowflake.com/guide/data_engineering_with_apache_airflow/index.html?index=..%2F..index": { + "title": "Data Engineering with Apache Airflow, Snowflake & dbt", + "updated": "2023-08-15T02:36:45-07:00", + "tags": "airflow,dataengineering,dbt,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/data_engineering_with_snowpark_python_and_dbt/index.html?index=..%2F..index": { + "title": "Data Engineering with Snowpark Python and dbt", + "updated": "2023-08-15T02:36:45-07:00", + "tags": "dataengineering,dbt,web" + }, + "https://quickstarts.snowflake.com/guide/data_teams_with_dbt_core/index.html?index=..%2F..index": { + "title": "Accelerating Data Teams with dbt Core & Snowflake", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,datasharing,dbt,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/database_modeling_with_sqldbm/index.html?index=..%2F..index": { + "title": "Cloud-native Database Modeling with SqlDBM", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "cicd,dataengineering,datamodeling,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/extract_attributes_dicom_files_java_udf/index.html?index=..%2F..index": { + "title": "Extract Attributes from DICOM Files using Snowpark for Python and Java", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "dataengineering,datascience,unstructureddata,web" + }, + "https://quickstarts.snowflake.com/guide/ingest_data_from_pubsub_to_snowflake_with_apache_beam/index.html?index=..%2F..index": { + "title": "Ingest data from PubSub to Snowflake with Apache Beam", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "apachebeam,dataengineering,web" + }, + "https://quickstarts.snowflake.com/guide/parsing_semi_structured_data_with_coalesce/index.html?index=..%2F..index": { + "title": "Parsing Semi-Structured Data with Coalesce", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/processing_hl7_fhir_messages_with_snowflake/index.html?index=..%2F..index": { + "title": "Getting Started - Processing HL7 FHIR Messages with Snowflake", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "dataengineering,datalake,datascience,fhirpwd,gettingstarted,health&lifesciences,hl7,web" + }, + "https://quickstarts.snowflake.com/guide/processing_hl7_v2_messages_with_snowflake/index.html?index=..%2F..index": { + "title": "Getting Started - Processing HL7 V2 Messages with Snowflake", + "updated": "2023-09-18T13:24:41-07:00", + "tags": "dataengineering,datalake,datascience,fhir,gettingstarted,health&lifesciences,hl7,unstructureddata,web" + }, + "https://quickstarts.snowflake.com/guide/snowflake_transformer/index.html?index=..%2F..index": { + "title": "A Dive Into Slowly Changing Dimensions with Snowpark and StreamSets", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "dataengineering,gettingstarted,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/snowpark_python_top_three_tips_for_optimal_performance/index.html?index=..%2F..index": { + "title": "Snowpark Python: Top Three Tips for Optimal Performance", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "bestpractices,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/streamsets_transformer_for_snowflake_hol/index.html?index=..%2F..index": { + "title": "StreamSets' Transformer for Snowflake: Hands on Lab", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "dataengineering,datascience,gettingstarted,streamsets,web" + }, + "https://quickstarts.snowflake.com/guide/transform_your_data_with_coalesce/index.html?index=..%2F..index": { + "title": "Accelerate Transformations with Coalesce and Snowflake", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_data_vault/index.html?index=..%2F..index": { + "title": "Building a Real-Time Data Vault in Snowflake", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataenineering,datavault,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_fivetran/index.html?index=..%2F..index": { + "title": "Automating Data Pipelines to Drive Marketing Analytics with Snowflake & Fivetran", + "updated": "2023-08-15T02:36:56-07:00", + "tags": "dbt,fivetran,gettingstarted,marketinganalytics,web" + }, + "https://quickstarts.snowflake.com/guide/A Faster Path to Operational AI with Continual and Snowflake/index.html?index=..%2F..index": { + "title": "A Faster Path to Operational AI with Continual and Snowflake", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "dataengineering,datascience,gettingstarted,machinelearning,operationalai,web" + }, + "https://quickstarts.snowflake.com/guide/analyze_pdf_invoices_snowpark_python_java/index.html?index=..%2F..index": { + "title": "Analyze PDF Invoices using Snowpark for Java and Python", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,unstructureddata,web" + }, + "https://quickstarts.snowflake.com/guide/automl_with_snowflake_and_datarobot/index.html?index=..%2F..index": { + "title": "Accelerating Machine Learning with Snowflake and DataRobot", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "automl,databases,datarobot,fileformats,partnerconnect,stages,tables,web" + }, + "https://quickstarts.snowflake.com/guide/automl_with_snowflake_and_h2o/index.html?index=..%2F..index": { + "title": "AutoML with Snowflake and H2O Driverless AI", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "automl,databases,fileformats,h2o,partnerconnect,stages,tables,web" + }, + "https://quickstarts.snowflake.com/guide/data_science_with_dataiku/index.html?index=..%2F..index": { + "title": "Accelerating Data Science with Snowflake and Dataiku", + "updated": "2023-08-15T02:36:45-07:00", + "tags": "dataiku,datascience,web" + }, + "https://quickstarts.snowflake.com/guide/end_to_end_machine_learning_with_dataiku/index.html?index=..%2F..index": { + "title": "End to End Machine learning with Snowflake and Dataiku", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/exploratory_data_analysis_with_snowflake_and_deepnote/index.html?index=..%2F..index": { + "title": "Exploratory Data Analysis with Snowflake and Deepnote", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/frosty_llm_chatbot_on_streamlit_snowflake/index.html?index=..%2F..index": { + "title": "Frosty: Build an LLM Chatbot in Streamlit on your Snowflake Data", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "llms,openai,snowparkpython,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_snowpark_machine_learning/index.html?index=..%2F..index": { + "title": "Machine Learning with Snowpark Python: - Credit Card Approval Prediction", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/harness_the_power_of_snowflake_with_informatica_idmc/index.html?index=..%2F..index": { + "title": "Harness the Power of Snowflake with Informatica Intelligent Data Management Cloud", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,dataintegration,elt,etl,gettingstarted,informatica,pdo,web" + }, + "https://quickstarts.snowflake.com/guide/hex-churn-model/index.html?index=..%2F..index": { + "title": "Churn modeling using Snowflake and Hex", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "hex,notebooks,partnerconnect,web" + }, + "https://quickstarts.snowflake.com/guide/hex/index.html?index=..%2F..index": { + "title": "Building and deploying a time series forecast with Hex + Snowflake", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "hex,notebooks,partnerconnect,web" + }, + "https://quickstarts.snowflake.com/guide/image_recognition_snowpark_pytorch_streamlit_openai/index.html?index=..%2F..index": { + "title": "A Image Recognition App in Snowflake using Snowpark Python, PyTorch, Streamlit and OpenAI", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/machine_learning_with_aws_autopilot/index.html?index=..%2F..index": { + "title": "Snowflake and Amazon SageMaker Autopilot Integration: Machine Learning with SQL", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "datascience,gettingstarted,machinelearning,web" + }, + "https://quickstarts.snowflake.com/guide/machine_learning_with_saturncloud/index.html?index=..%2F..index": { + "title": "Machine Learning on Unstructured Data with Saturn Cloud and Snowflake", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "computervision,machinelearning,web" + }, + "https://quickstarts.snowflake.com/guide/predict_ad_impressions_with_ml_powered_analysis/index.html?index=..%2F..index": { + "title": "Predict Ad Impressions with ML-Powered Analysis", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "adtech,gettingstarted,machinelearning,web" + }, + "https://quickstarts.snowflake.com/guide/reach_and_frequency_queries/index.html?index=..%2F..index": { + "title": "Reach and Frequency queries for advertising measurement", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "adtech,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/recommendation_engine_aws_sagemaker/index.html?index=..%2F..index": { + "title": "Build a Recommendation Engine with AWS SageMaker", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "aws,machinelearning,sagemaker" + }, + "https://quickstarts.snowflake.com/guide/resource_optimization_billing_metrics/index.html?index=..%2F..index": { + "title": "Resource Optimization: Billing Metrics", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "billing,billingmetrics,costoptimization,monitoring,resourceoptimization,web" + }, + "https://quickstarts.snowflake.com/guide/seamless_ML_workflows_with_snowpark_and_deepnote/index.html?index=..%2F..index": { + "title": "Seamless Machine Learning Workflows with Snowpark & Deepnote", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/secure-crosswalks-for-advertising-measurement/index.html?index=..%2F..index": { + "title": "Secure Crosswalks for Advertising Measurement", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "adtech,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/text_embedding_as_snowpark_python_udf/index.html?index=..%2F..index": { + "title": "Text Embedding As A Snowpark Python UDF", + "updated": "2023-08-17T14:16:52-07:00", + "tags": "datascience,machinelearning,nlp,snowparkpython,web" + }, + "https://quickstarts.snowflake.com/guide/time_series_forecasting_zepl/index.html?index=..%2F..index": { + "title": "Time Series Forecasting with Zepl", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datascience,financialservices,gettingstarted,modeling,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_snowflake_data_wrangler/index.html?index=..%2F..index": { + "title": "Data-centric Approach to Machine Learning Using Snowflake and Amazon SageMaker Data Wrangler", + "updated": "2023-08-15T02:36:56-07:00", + "tags": "datamarketplace,datawrangler,featureengineering,financialservices,machinelearning,sagemaker,storageintegration,web" + }, + "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_azure_devops/index.html?index=..%2F..index": { + "title": "DevOps: Database Change Management with schemachange and Azure DevOps", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,devops,web" + }, + "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_github/index.html?index=..%2F..index": { + "title": "DevOps: Database Change Management with schemachange and GitHub", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,devops,web" + }, + "https://quickstarts.snowflake.com/guide/devops_dcm_schemachange_jenkins/index.html?index=..%2F..index": { + "title": "DevOps: Database Change Management with schemachange and Jenkins", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,devops,web" + }, + "https://quickstarts.snowflake.com/guide/devops_dcm_terraform_github/index.html?index=..%2F..index": { + "title": "DevOps: Database Change Management with Terraform and GitHub", + "updated": "2023-08-15T02:36:46-07:00", + "tags": "dataengineering,devops,web" + }, + "https://quickstarts.snowflake.com/guide/a_postman_tutorial_for_snowflake_sql_api/index.html?index=..%2F..index": { + "title": "A Postman Tutorial for the Snowflake SQL API", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "api,gettingstarted,postman,rest,web" + }, + "https://quickstarts.snowflake.com/guide/alert_on_events/index.html?index=..%2F..index": { + "title": "Getting Started with Event Tables and Alerts", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "dataapplications,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/data_apps_summit_lab/index.html?index=..%2F..index": { + "title": "Building a data application with Snowflake Marketplace, Snowpark and Streamlit", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/data_engineering_pipelines_with_snowpark_python/index.html?index=..%2F..index": { + "title": "Data Engineering Pipelines with Snowpark Python", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,python,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/data_engineering_with_datastage/index.html?index=..%2F..index": { + "title": "A Data Integration Guide: Load Banking Data into Snowflake with IBM DataStage", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,dataintegration,datastage,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/geocoding_address_data_with_mapbox/index.html?index=..%2F..index": { + "title": "Geocoding Address Data with Mapbox", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "api,geo,geocoding,geospatial,gettingstarted,spatial" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_dataengineering_ml_using_snowpark_python_ja/index.html?index=..%2F..index": { + "title": "Snowpark for Python\u3092\u4f7f\u7528\u3057\u305f\u30c7\u30fc\u30bf\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3068ML\u306e\u5165\u9580", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,gettingstarted,ja,machinelearning,scikit-learn,snowparkpython,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_dataengineering_ml_using_snowpark_python/index.html?index=..%2F..index": { + "title": "Getting Started with Data Engineering and ML using Snowpark for Python", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_generative_ai_snowflake_external_functions/index.html?index=..%2F..index": { + "title": "Getting Started with Generative AI in Snowflake and Streamlit", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowflake/index.html?index=..%2F..index": { + "title": "Getting Started with Snowflake - Zero to Snowflake", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_for_python_streamlit/index.html?index=..%2F..index": { + "title": "Getting Started With Snowpark for Python and Streamlit", + "updated": "2023-09-18T12:31:04-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_in_snowflake_python_worksheets_ja/index.html?index=..%2F..index": { + "title": "Snowflake Python\u30ef\u30fc\u30af\u30b7\u30fc\u30c8\u306b\u3088\u308bSnowpark\u5165\u9580", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,gettingstarted,ja,pythonworksheets,snowparkpython,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_in_snowflake_python_worksheets/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark in Snowflake Python Worksheets", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpipe/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpipe", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "auto-ingest,aws,dataengineering,gettingstarted,snowpipe,sql,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_streams_and_tasks/index.html?index=..%2F..index": { + "title": "Getting Started with Streams & Tasks", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,financialservices,streams,tasks,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_time_travel/index.html?index=..%2F..index": { + "title": "Getting Started with Time Travel", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,gettingstarted,hello,sql,test,web" + }, + "https://quickstarts.snowflake.com/guide/terraforming_snowflake/index.html?index=..%2F..index": { + "title": "Terraforming Snowflake", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataapplications,dataengineering,datascience,gettingstarted,terraform,web" + }, + "https://quickstarts.snowflake.com/guide/tour_of_ingest/index.html?index=..%2F..index": { + "title": "Tour of Ingest", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataapplications,dataengineering,datascience,gettingstarted,ingest,web" + }, + "https://quickstarts.snowflake.com/guide/Accelerate_Data_Transformation_with_the_Telecom_Data_Cloud/index.html?index=..%2F..index": { + "title": "Accelerate Data Transformation with the Telecom Data Cloud and Informatica", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "dataengineering,dataintegration,elt,etl,gettingstarted,informatica,pdo,web" + }, + "https://quickstarts.snowflake.com/guide/accelerating_data_teams_with_snowflake_and_dbt_cloud_hands_on_lab/index.html?index=..%2F..index": { + "title": "Accelerating Data Teams with Snowflake and dbt Cloud Hands On Lab", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "data,dataengineering,dbt,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_java_on_aws/index.html?index=..%2F..index": { + "title": "Build a Custom API in Java on AWS", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "api,dataapplications,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_python_on_aws/index.html?index=..%2F..index": { + "title": "Build a Custom API in Python on AWS", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "api,dataapplications,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_a_custom_api_in_python/index.html?index=..%2F..index": { + "title": "Build a Custom API in Python and Flask", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "api,dataapplications,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_a_data_app_with_snowflake/index.html?index=..%2F..index": { + "title": "Build a Data App with Snowflake", + "updated": "2023-08-17T14:16:52-07:00", + "tags": "api,dataapplications,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/build_an_ad_tag_in_python_on_aws/index.html?index=..%2F..index": { + "title": "Build an Ad Tag in Python on AWS", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "adtech,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/coherent_spark_connector/index.html?index=..%2F..index": { + "title": "Coherent Spark Connector - use business logic from Excel spreadsheets in Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "coherent,coherentspark,excel,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/create_eureka_moments_with_data_world/index.html?index=..%2F..index": { + "title": "Create Eureka Moments with data.world and Snowflake", + "updated": "2023-08-15T02:36:44-07:00", + "tags": "datacatalog,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/end_to_end_nlp_and_ml_using_snowpark_python_and_streamlit:_sentiments_analysis/index.html?index=..%2F..index": { + "title": "NLP and ML with Snowpark Python and Streamlit for Sentiment Analysis", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "dataengineering,datascience,gettingstarted,nlp,snowpark,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/example_matt_marzillo/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark for Machine Learning on AzureML", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/financial-services-asset-management-snowflake/index.html?index=..%2F..index": { + "title": "Financial Services Asset Management", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/geo_analysis_geometry/index.html?index=..%2F..index": { + "title": "Geospatial Analysis using Geometry Data Type", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "geospatial,gettingstarted,pythonudfs,web" + }, + "https://quickstarts.snowflake.com/guide/geo_analysis_telecom/index.html?index=..%2F..index": { + "title": "Geospatial Analytics for Telecom with Snowflake and Carto", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "geospatial,gettingstarted,pythonudfs,web" + }, + "https://quickstarts.snowflake.com/guide/geospatial_analytics_with_snowflake_and_carto_ny/index.html?index=..%2F..index": { + "title": "Geospatial Analytics for Retail with Snowflake and CARTO", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_datameer/index.html?index=..%2F..index": { + "title": "Getting Started with Datameer", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "cataloging,exploration,gettingstarted,tranformation,visualisation,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_external_functions_aws/index.html?index=..%2F..index": { + "title": "Getting Started With External Functions on AWS", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "aws,dataengineering,externalfunctions,gettingstarted,sql" + }, + "https://quickstarts.snowflake.com/guide/getting_started_external_functions_azure/index.html?index=..%2F..index": { + "title": "Getting Started With External Functions on Azure", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "azure,dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_external_functions_gcc/index.html?index=..%2F..index": { + "title": "Getting Started With External Functions Using the Google Cloud Console", + "updated": "2023-08-15T02:36:47-07:00", + "tags": "dataengineering,externalfunctions,gcc,gettingstarted,sql" + }, + "https://quickstarts.snowflake.com/guide/getting_started_keboola/index.html?index=..%2F..index": { + "title": "Getting Started with Keboola", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_snowflake_sql_api/index.html?index=..%2F..index": { + "title": "Getting Started With Snowflake SQL API", + "updated": "2023-08-15T02:36:48-07:00", + "tags": "dataengineering,gettingstarted,sql,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_amg_and_streamlit_on_real-time_dashboarding/index.html?index=..%2F..index": { + "title": "Getting Started with Amazon Managed Service for Grafana and Streamlit On Real-time Dashboarding", + "updated": "2023-08-15T16:54:15-07:00", + "tags": "amazonamg,applicationloadbalancer,container,ecs,gettingstarted,grafana,streaming,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_feast_snowflake/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark for Python and Feast", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_geospatial_geography/index.html?index=..%2F..index": { + "title": "Getting Started with Geospatial - Geography", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "geospatial,gettingstarted,sql,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_native_apps/index.html?index=..%2F..index": { + "title": "Getting Started with Snowflake Native Apps", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,nativeapps,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_python/index.html?index=..%2F..index": { + "title": "Getting Started with Python", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,gettingstarted,python,snowsql,sql" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_search_optimization/index.html?index=..%2F..index": { + "title": "Getting started with Search Optimization", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,gettingstarted,performance,queryacceleration,search,searchoptimization,speed,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_sigma/index.html?index=..%2F..index": { + "title": "Getting Started with Sigma", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "analysis,analytics,bi,businessintelligence,dashboarding,dataengineering,excel,gettingstarted,sigma,sigmacomputing,snowflake,spreadsheet,visualization,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_dataframe_api/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark and the Dataframe API", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,datascience,gettingstarted,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_for_machine_learning_on_sagemaker/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark for Machine Learning on SageMaker", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_python_scikit/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpark for Python with Scikit-learn", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_scala/index.html?index=..%2F..index": { + "title": "Getting Started With Snowpark Scala", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowpipe_streaming_aws_msk/index.html?index=..%2F..index": { + "title": "Getting Started with Snowpipe Streaming and Amazon MSK", + "updated": "2023-08-15T16:54:15-07:00", + "tags": "amazonmsk,gettingstarted,kafka,snowpipestreaming,snowsql,streaming,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_snowsql/index.html?index=..%2F..index": { + "title": "Getting Started with SnowSQL", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,gettingstarted,snowsql,sql,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_unstructured_data/index.html?index=..%2F..index": { + "title": "Getting Started with Unstructured Data", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "dataengineering,datalake,datascience,gettingstarted,unstructureddata,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_user_defined_sql_functions/index.html?index=..%2F..index": { + "title": "Getting Started With User-Defined SQL Functions", + "updated": "2023-08-15T02:36:50-07:00", + "tags": "gettingstarted,sql,udf,udtf,web" + }, + "https://quickstarts.snowflake.com/guide/getting-started-django-snowflake/index.html?index=..%2F..index": { + "title": "Getting Started with Snowflake as a backend for Django", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,django,django-snowflake,gettingstarted,snowflake,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/how_to_resolve_data_with_fullcontact_and_snowflake/index.html?index=..%2F..index": { + "title": "How To Resolve Data with FullContact and Snowflake", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "web" + }, + "https://quickstarts.snowflake.com/guide/intro_to_machine_learning_with_snowpark_ml_for_python/index.html?index=..%2F..index": { + "title": "Intro to Machine Learning with Snowpark ML for Python", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/java_trace_events/index.html?index=..%2F..index": { + "title": "Using trace events in Java", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "gettingstarted,java,telemetry,traceevents,web" + }, + "https://quickstarts.snowflake.com/guide/leverage_dbt_cloud_to_generate_ml_ready_pipelines_using_snowpark_python/index.html?index=..%2F..index": { + "title": "Leverage dbt Cloud to Generate ML ready pipelines using Snowpark python", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/mlpf_forecasting_ad/index.html?index=..%2F..index": { + "title": "Getting Started with Anomaly Detection & Forecasting with Machine Learning Powered Functions (MLPFs)", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/modern_data_stack_with_fivetran_snowflake_salesforce/index.html?index=..%2F..index": { + "title": "Fivetran - Automate Salesforce Insights: Source, Target, Transformations, Dashboard...NO CODE", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "analytics,dataengineering,dbt,fivetran,gettingstarted,salesforce,web" + }, + "https://quickstarts.snowflake.com/guide/native-app-chairlift/index.html?index=..%2F..index": { + "title": "Build a Snowflake Native App to Analyze Chairlift Sensor Data", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "apps,dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/power_apps_snowflake/index.html?index=..%2F..index": { + "title": "Getting Started with Power Apps and Snowflake", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,gettingstarted,microsoft,powerapps,powerautomate,powerplatform,web" + }, + "https://quickstarts.snowflake.com/guide/segment-retl-salesforce/index.html?index=..%2F..index": { + "title": "Use Segment Reverse ETL to sync your Snowflake customer table to Salesforce", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "gettingstarted,reverseetl,salesforce,segment,web" + }, + "https://quickstarts.snowflake.com/guide/soda/index.html?index=..%2F..index": { + "title": "Data Quality Testing with Soda", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "dataengineering,dataquality,datascience,datatesting,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/sundeck_opscenter/index.html?index=..%2F..index": { + "title": "Getting Started with Sundeck OpsCenter", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "costmanagement,gettingstarted,querymonitoring,web,workloadanalytics" + }, + "https://quickstarts.snowflake.com/guide/validate_your_customer_identity_model_with_identityqa/index.html?index=..%2F..index": { + "title": "Validate Your Customer Identity Model with IdentityQA", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,gettingstarted,identity,identitymodel,identitymodeling,identityresolution,nativeapps,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_data_lake/index.html?index=..%2F..index": { + "title": "Snowflake for Data Lake", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "datalake,gettingstarted,unstructureddata,web" + }, + "https://quickstarts.snowflake.com/guide/attaining_consumer_insights_with_snowflake_and_microsoft_power_bi/index.html?index=..%2F..index": { + "title": "Attaining Consumer Insights with Snowflake and Microsoft Power BI", + "updated": "2023-08-15T02:36:43-07:00", + "tags": "dataengineering,datascience,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_alation/index.html?index=..%2F..index": { + "title": "Learn How Alation Powers Data Intelligence on Snowflake", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "alation,datacatalog,dataengineering,datagovernance,dataintelligence,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/visual_analytics_powered_by_snowflake_and_tableau/index.html?index=..%2F..index": { + "title": "Visual Analytics powered by Snowflake and Tableau", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datasharing,datavisualization,embeddedanalytics,web" + }, + "https://quickstarts.snowflake.com/guide/resource_optimization_performance_optimization/index.html?index=..%2F..index": { + "title": "Resource Optimization: Performance", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "costoptimization,monitoring,optimization,performance,performanceoptimization,resourceoptimization,web" + }, + "https://quickstarts.snowflake.com/guide/resource_optimization_setup/index.html?index=..%2F..index": { + "title": "Resource Optimization: Setup & Configuration", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "administration,configuration,consumption,costoptimization,monitoring,resourceoptimization,setup,web" + }, + "https://quickstarts.snowflake.com/guide/resource_optimization_usage_monitoring/index.html?index=..%2F..index": { + "title": "Resource Optimization: Usage Monitoring", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "consumption,costoptimization,monitoring,resourceoptimization,usage,usagemonitoring,web" + }, + "https://quickstarts.snowflake.com/guide/snowflake_build_secure_multitenant_data_applications_snowflake_sigma/index.html?index=..%2F..index": { + "title": "Build and Secure Multi-Tenant Data Applications with Snowflake and Sigma", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "sigma,snowflake,web" + }, + "https://quickstarts.snowflake.com/guide/sap_accounts_receivable_to_snowflake_using_adf/index.html?index=..%2F..index": { + "title": "SAP Accounts Receivable to Snowflake using ADF", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "adf,ar,azure,azuredatafactory,dataengineering,dbt,finance,gettingstarted,sap,tableau,web" + }, + "https://quickstarts.snowflake.com/guide/secure_audience_overlaps/index.html?index=..%2F..index": { + "title": "Simple and Secure Audience Overlaps", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "adtech,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/suppress_existing_customers_from_youtube_campaign_with_hightouch_and_snowflake/index.html?index=..%2F..index": { + "title": "Suppress existing customers from a Youtube campaign with Hightouch and Snowflake", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "adtech,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/vhol_snowflake_salesforce_tcrm/index.html?index=..%2F..index": { + "title": "Enrich Salesforce data with Snowflake to deliver your Customer 360", + "updated": "2023-08-15T02:36:56-07:00", + "tags": "customer360,databases,datamarketplace,fileformats,salesforce,stages,tableaucrm,tables,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_cybersyn_financial_and_economic_essentials_app/index.html?index=..%2F..index": { + "title": "Quickstart Guide: Cybersyn Financial & Economic Essentials App", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/getting_started_with_cybersyn_shopify_streamlit_native_app/index.html?index=..%2F..index": { + "title": "Quickstart Guide: Cybersyn Shopify Benchmarks App", + "updated": "2023-08-15T02:36:49-07:00", + "tags": "dataengineering,datascience,gettingstarted,twitter,web" + }, + "https://quickstarts.snowflake.com/guide/developing_tasty_bytes_react_native_application_with_snowflake_sql_api/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Developing React Native Data Application with SQL API", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "api,dataapplications,dataengineering,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/integrating_tasty_bytes_location_recommendation_ml_model_into_the_react_native_data_app/index.html?index=..%2F..index": { + "title": "Integrating Tasty Bytes Location Recommendations ML model into the React Native Data Application", + "updated": "2023-08-15T02:36:52-07:00", + "tags": "api,dataapplications,dataengineering,datascience,gettingstarted,machinelearning,snowpark,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_introduction_ja/index.html?index=..%2F..index": { + "title": "Tasty Bytes\u306e\u7d39\u4ecb", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "gettingstarted,ja,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_introduction/index.html?index=..%2F..index": { + "title": "An Introduction to Tasty Bytes", + "updated": "2023-08-15T02:36:53-07:00", + "tags": "gettingstarted,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_snowpark_101_for_data_science_ja/index.html?index=..%2F..index": { + "title": "Tasty Bytes - \u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u306e\u305f\u3081\u306eSnowpark\u5165\u9580", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datascience,gettingstarted,ja,machinelearning,snowpark,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_snowpark_101_for_data_science/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Snowpark 101 for Data Science", + "updated": "2023-09-18T11:52:17-07:00", + "tags": "dataengineering,datascience,gettingstarted,machinelearning,snowpark,streamlit,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_collaboration_ja/index.html?index=..%2F..index": { + "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u30b3\u30e9\u30dc\u30ec\u30fc\u30b7\u30e7\u30f3", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "gettingstarted,ja,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_collaboration/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Collaboration", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "gettingstarted,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_data_governance/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Data Governance", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "datagovernance,gettingstarted,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_financial_governance/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Financial Governance", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "financialgovernance,gettingstarted,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_geospatial/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Geospatial", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "geospatial,gettingstarted,tastybytes,web,zerotosnowflake" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_semi_structured_data_ja/index.html?index=..%2F..index": { + "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u534a\u69cb\u9020\u5316\u30c7\u30fc\u30bf", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datawarehouse,gettingstarted,ja,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_semi_structured_data/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Semi-Structured Data", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datawarehouse,gettingstarted,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_transformation_ja/index.html?index=..%2F..index": { + "title": "Tasty Bytes - \u30bc\u30ed\u304b\u3089\u306eSnowflake - \u5909\u63db", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datawarehouse,gettingstarted,ja,web" + }, + "https://quickstarts.snowflake.com/guide/tasty_bytes_zero_to_snowflake_transformation/index.html?index=..%2F..index": { + "title": "Tasty Bytes - Zero to Snowflake - Transformation", + "updated": "2023-08-15T02:36:55-07:00", + "tags": "dataengineering,datawarehouse,gettingstarted,web" + } +} \ No newline at end of file diff --git a/snowscraper/controller.py b/snowscraper/controller.py index e6d77f2..c704296 100644 --- a/snowscraper/controller.py +++ b/snowscraper/controller.py @@ -1,3 +1,5 @@ +import datetime +import json import argparse import importlib import pkgutil @@ -5,19 +7,25 @@ from pathlib import Path SCRAPERS = {} - def register_scraper(cls): SCRAPERS[cls.__name__] = cls return cls - def run_all(args: argparse.Namespace): results = {} for scraper_cls in SCRAPERS.values(): - scraper = scraper_cls(after=args.after) - results |= scraper.scrape() + scraper = scraper_cls() + results.update(scraper.scrape()) print(results) + # Save the results to a JSON file + with open('results.json', 'w') as json_file: + json.dump(results, json_file, indent=4, default=datetime_handler) + +def datetime_handler(obj): + if isinstance(obj, datetime.datetime): + return obj.isoformat() + raise TypeError("Unknown type") def import_scrapers(): directory = Path(__file__).resolve().parent / "scrapers" diff --git a/snowscraper/helpers.py b/snowscraper/helpers.py index dc7849d..5afbcdb 100644 --- a/snowscraper/helpers.py +++ b/snowscraper/helpers.py @@ -1,5 +1,15 @@ from datetime import datetime +def unix_to_datetime_utc(timestamp_millis): + # Convert to seconds from milliseconds + timestamp_seconds = timestamp_millis / 1000.0 + + # Create a datetime object in UTC + dt_object = datetime.utcfromtimestamp(timestamp_seconds) + + # Format the datetime object as an ISO 8601 string + return dt_object.isoformat() + 'Z' # 'Z' indicates UTC time + def string_to_datetime(date_string): try: diff --git a/snowscraper/scrapers/medium.py b/snowscraper/scrapers/medium.py index 3825801..fe91c88 100644 --- a/snowscraper/scrapers/medium.py +++ b/snowscraper/scrapers/medium.py @@ -1,32 +1,1922 @@ from datetime import datetime from datetime import timezone +import requests import feedparser from ..controller import register_scraper -from ..helpers import string_to_datetime +from ..helpers import unix_to_datetime_utc from ..scraper import BaseScraper +LONG_QUERY = {"query": """ +query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) { + collection(id: $collectionId) { + __typename + id + ...PublicationHomepage_collection + } +} + +fragment PublicationHomepage_collection on Collection { + id + ...PublisherHeader_publisher + ...PublisherHomepagePosts_publisher + ...usePublicationAnalytics_collection + ...CollectionMetadata_collection + __typename +} + +fragment PublisherHeader_publisher on Publisher { + id + ...PublisherHeaderBackground_publisher + ...PublisherHeaderNameplate_publisher + ...PublisherHeaderActions_publisher + ...PublisherHeaderNav_publisher + __typename +} + +fragment PublisherHeaderBackground_publisher on Publisher { + __typename + id + customStyleSheet { + ...PublisherHeaderBackground_customStyleSheet + __typename + id + } + ... on Collection { + colorPalette { + tintBackgroundSpectrum { + backgroundColor + __typename + } + __typename + } + isAuroraVisible + legacyHeaderBackgroundImage { + id + originalWidth + focusPercentX + focusPercentY + __typename + } + ...collectionTintBackgroundTheme_collection + __typename + id + } + ...publisherUrl_publisher +} + +fragment PublisherHeaderBackground_customStyleSheet on CustomStyleSheet { + id + global { + colorPalette { + background { + rgb + __typename + } + __typename + } + __typename + } + header { + headerScale + backgroundImageDisplayMode + backgroundImageVerticalAlignment + backgroundColorDisplayMode + backgroundColor { + alpha + rgb + ...getHexFromColorValue_colorValue + ...getOpaqueHexFromColorValue_colorValue + __typename + } + secondaryBackgroundColor { + ...getHexFromColorValue_colorValue + __typename + } + postBackgroundColor { + ...getHexFromColorValue_colorValue + __typename + } + backgroundImage { + id + originalWidth + __typename + } + __typename + } + __typename +} + +fragment getHexFromColorValue_colorValue on ColorValue { + rgb + alpha + __typename +} + +fragment getOpaqueHexFromColorValue_colorValue on ColorValue { + rgb + __typename +} + +fragment collectionTintBackgroundTheme_collection on Collection { + colorPalette { + ...collectionTintBackgroundTheme_colorPalette + __typename + } + customStyleSheet { + id + ...collectionTintBackgroundTheme_customStyleSheet + __typename + } + __typename + id +} + +fragment collectionTintBackgroundTheme_colorPalette on ColorPalette { + ...customTintBackgroundTheme_colorPalette + __typename +} + +fragment customTintBackgroundTheme_colorPalette on ColorPalette { + tintBackgroundSpectrum { + ...ThemeUtil_colorSpectrum + __typename + } + __typename +} + +fragment ThemeUtil_colorSpectrum on ColorSpectrum { + backgroundColor + ...ThemeUtilInterpolateHelpers_colorSpectrum + __typename +} + +fragment ThemeUtilInterpolateHelpers_colorSpectrum on ColorSpectrum { + colorPoints { + ...ThemeUtil_colorPoint + __typename + } + __typename +} + +fragment ThemeUtil_colorPoint on ColorPoint { + color + point + __typename +} + +fragment collectionTintBackgroundTheme_customStyleSheet on CustomStyleSheet { + id + ...customTintBackgroundTheme_customStyleSheet + __typename +} + +fragment customTintBackgroundTheme_customStyleSheet on CustomStyleSheet { + id + global { + colorPalette { + primary { + colorPalette { + ...customTintBackgroundTheme_colorPalette + __typename + } + __typename + } + __typename + } + __typename + } + __typename +} + +fragment publisherUrl_publisher on Publisher { + id + __typename + ... on Collection { + ...collectionUrl_collection + __typename + id + } + ... on User { + ...userUrl_user + __typename + id + } +} + +fragment collectionUrl_collection on Collection { + id + domain + slug + __typename +} + +fragment userUrl_user on User { + __typename + id + customDomainState { + live { + domain + __typename + } + __typename + } + hasSubdomain + username +} + +fragment PublisherHeaderNameplate_publisher on Publisher { + ...PublisherAvatar_publisher + ...PublisherHeaderLogo_publisher + ...PublisherFollowersCount_publisher + __typename +} + +fragment PublisherAvatar_publisher on Publisher { + __typename + ... on Collection { + id + ...CollectionAvatar_collection + __typename + } + ... on User { + id + ...UserAvatar_user + __typename + } +} + +fragment CollectionAvatar_collection on Collection { + name + avatar { + id + __typename + } + ...collectionUrl_collection + __typename + id +} + +fragment UserAvatar_user on User { + __typename + id + imageId + mediumMemberAt + name + username + ...userUrl_user +} + +fragment PublisherHeaderLogo_publisher on Publisher { + __typename + id + customStyleSheet { + id + header { + logoImage { + ...PublisherHeaderLogo_image + __typename + } + appNameColor { + ...getHexFromColorValue_colorValue + __typename + } + appNameTreatment + __typename + } + __typename + } + name + ... on Collection { + isAuroraVisible + logo { + id + originalHeight + originalWidth + __typename + } + __typename + id + } + ... on User { + ...useIsVerifiedBookAuthor_user + __typename + id + } + ...CustomHeaderTooltip_publisher + ...publisherUrl_publisher +} + +fragment PublisherHeaderLogo_image on ImageMetadata { + id + originalHeight + originalWidth + __typename +} + +fragment useIsVerifiedBookAuthor_user on User { + verifications { + isBookAuthor + __typename + } + __typename + id +} + +fragment CustomHeaderTooltip_publisher on Publisher { + __typename + id + customStyleSheet { + id + header { + appNameTreatment + nameTreatment + __typename + } + __typename + } + ... on Collection { + isAuroraVisible + slug + __typename + id + } +} + +fragment PublisherFollowersCount_publisher on Publisher { + id + __typename + id + ... on Collection { + slug + subscriberCount + ...collectionUrl_collection + __typename + id + } + ... on User { + socialStats { + followerCount + __typename + } + username + ...userUrl_user + __typename + id + } +} + +fragment PublisherHeaderActions_publisher on Publisher { + __typename + ...MetaHeaderPubMenu_publisher + ... on Collection { + ...CollectionFollowButton_collection + __typename + id + } + ... on User { + ...FollowAndSubscribeButtons_user + __typename + id + } +} + +fragment MetaHeaderPubMenu_publisher on Publisher { + __typename + ... on Collection { + ...MetaHeaderPubMenu_publisher_collection + __typename + id + } + ... on User { + ...MetaHeaderPubMenu_publisher_user + __typename + id + } +} + +fragment MetaHeaderPubMenu_publisher_collection on Collection { + id + slug + name + domain + newsletterV3 { + slug + __typename + id + } + ...MutePopoverOptions_collection + __typename +} + +fragment MutePopoverOptions_collection on Collection { + id + __typename +} + +fragment MetaHeaderPubMenu_publisher_user on User { + id + username + ...MutePopoverOptions_creator + __typename +} + +fragment MutePopoverOptions_creator on User { + id + __typename +} + +fragment CollectionFollowButton_collection on Collection { + __typename + id + name + slug + ...collectionUrl_collection + ...SusiClickable_collection +} + +fragment SusiClickable_collection on Collection { + ...SusiContainer_collection + __typename + id +} + +fragment SusiContainer_collection on Collection { + name + ...SignInOptions_collection + ...SignUpOptions_collection + __typename + id +} + +fragment SignInOptions_collection on Collection { + id + name + __typename +} + +fragment SignUpOptions_collection on Collection { + id + name + __typename +} + +fragment FollowAndSubscribeButtons_user on User { + ...UserFollowButton_user + ...UserSubscribeButton_user + __typename + id +} + +fragment UserFollowButton_user on User { + ...UserFollowButtonSignedIn_user + ...UserFollowButtonSignedOut_user + __typename + id +} + +fragment UserFollowButtonSignedIn_user on User { + id + name + __typename +} + +fragment UserFollowButtonSignedOut_user on User { + id + ...SusiClickable_user + __typename +} + +fragment SusiClickable_user on User { + ...SusiContainer_user + __typename + id +} + +fragment SusiContainer_user on User { + ...SignInOptions_user + ...SignUpOptions_user + __typename + id +} + +fragment SignInOptions_user on User { + id + name + __typename +} + +fragment SignUpOptions_user on User { + id + name + __typename +} + +fragment UserSubscribeButton_user on User { + id + isPartnerProgramEnrolled + name + viewerEdge { + id + isFollowing + isUser + __typename + } + viewerIsUser + newsletterV3 { + id + ...useNewsletterV3Subscription_newsletterV3 + __typename + } + ...useNewsletterV3Subscription_user + ...MembershipUpsellModal_user + __typename +} + +fragment useNewsletterV3Subscription_newsletterV3 on NewsletterV3 { + id + type + slug + name + collection { + slug + __typename + id + } + user { + id + name + username + newsletterV3 { + id + __typename + } + __typename + } + __typename +} + +fragment useNewsletterV3Subscription_user on User { + id + username + newsletterV3 { + ...useNewsletterV3Subscription_newsletterV3 + __typename + id + } + __typename +} + +fragment MembershipUpsellModal_user on User { + id + name + imageId + postSubscribeMembershipUpsellShownAt + newsletterV3 { + id + __typename + } + __typename +} + +fragment PublisherHeaderNav_publisher on Publisher { + __typename + id + customStyleSheet { + navigation { + navItems { + name + ...PublisherHeaderNavLink_headerNavigationItem + __typename + } + __typename + } + __typename + id + } + ...PublisherHeaderNavLink_publisher + ... on Collection { + domain + isAuroraVisible + slug + navItems { + tagSlug + title + url + __typename + } + __typename + id + } + ... on User { + customDomainState { + live { + domain + __typename + } + __typename + } + hasSubdomain + username + homePostsPublished: homepagePostsConnection(paging: {limit: 1}) { + posts { + id + __typename + } + __typename + } + ...useIsVerifiedBookAuthor_user + __typename + id + } +} + +fragment PublisherHeaderNavLink_headerNavigationItem on HeaderNavigationItem { + href + name + tags { + id + normalizedTagSlug + __typename + } + type + __typename +} + +fragment PublisherHeaderNavLink_publisher on Publisher { + __typename + id + ... on Collection { + slug + __typename + id + } +} + +fragment PublisherHomepagePosts_publisher on Publisher { + __typename + id + homepagePostsConnection( + paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom} + includeDistributedResponses: $includeDistributedResponses + ) { + posts { + inResponseToPostResult { + __typename + } + ...WithResponsesSidebar_post + ...PostPreview_post + __typename + } + pagingInfo { + next { + from + limit + __typename + } + __typename + } + __typename + } + ...CardByline_publisher + ...NewsletterV3Promo_publisher + ...PublisherHomepagePosts_user +} + +fragment WithResponsesSidebar_post on Post { + id + ...ThreadedResponsesSidebar_post + __typename +} + +fragment ThreadedResponsesSidebar_post on Post { + id + ...ThreadedResponsesSidebarContent_post + __typename +} + +fragment ThreadedResponsesSidebarContent_post on Post { + id + postResponses { + count + __typename + } + collection { + id + viewerEdge { + id + isEditor + __typename + } + __typename + } + creator { + id + __typename + } + ...ThreadedReplies_post + __typename +} + +fragment ThreadedReplies_post on Post { + __typename + id + ...ThreadedReply_post +} + +fragment ThreadedReply_post on Post { + __typename + id + ...ReadOrEditSimpleResponse_post + ...StoryResponse_post +} + +fragment ReadOrEditSimpleResponse_post on Post { + __typename + id + ...SimpleResponse_post +} + +fragment SimpleResponse_post on Post { + id + ...ResponseHeader_post + __typename +} + +fragment ResponseHeader_post on Post { + __typename + id + createdAt + firstPublishedAt + latestPublishedAt + creator { + id + name + ...UserAvatar_user + ...useIsVerifiedBookAuthor_user + ...UserMentionTooltip_user + __typename + } + ...ResponsePopoverMenu_post +} + +fragment UserMentionTooltip_user on User { + id + name + username + bio + imageId + mediumMemberAt + ...UserAvatar_user + ...UserFollowButton_user + ...useIsVerifiedBookAuthor_user + __typename +} + +fragment ResponsePopoverMenu_post on Post { + id + ...ReportUserMenuItem_post + ...HideResponseMenuItem_post + ...BlockUserMenuItem_post + ...UndoClapsMenuItem_post + __typename +} + +fragment ReportUserMenuItem_post on Post { + __typename + id + creator { + id + __typename + } + ...SusiClickable_post +} + +fragment SusiClickable_post on Post { + id + mediumUrl + ...SusiContainer_post + __typename +} + +fragment SusiContainer_post on Post { + id + __typename +} + +fragment HideResponseMenuItem_post on Post { + __typename + id + collection { + id + viewerEdge { + id + isEditor + __typename + } + __typename + } + creator { + id + __typename + } +} + +fragment BlockUserMenuItem_post on Post { + __typename + id + creator { + id + __typename + } +} + +fragment UndoClapsMenuItem_post on Post { + id + clapCount + __typename +} + +fragment StoryResponse_post on Post { + id + ...ResponseHeader_post + __typename +} + +fragment PostPreview_post on Post { + id + creator { + ...PostPreview_user + __typename + id + } + collection { + ...CardByline_collection + ...ExpandablePostByline_collection + __typename + id + } + ...InteractivePostBody_postPreview + firstPublishedAt + isLocked + isSeries + latestPublishedAt + inResponseToCatalogResult { + __typename + } + pinnedAt + pinnedByCreatorAt + previewImage { + id + focusPercentX + focusPercentY + __typename + } + readingTime + sequence { + slug + __typename + } + title + uniqueSlug + ...CardByline_post + ...PostFooterActionsBar_post + ...InResponseToEntityPreview_post + ...PostScrollTracker_post + ...HighDensityPreview_post + __typename +} + +fragment PostPreview_user on User { + __typename + name + username + ...CardByline_user + ...ExpandablePostByline_user + id +} + +fragment CardByline_user on User { + __typename + id + name + username + mediumMemberAt + socialStats { + followerCount + __typename + } + ...useIsVerifiedBookAuthor_user + ...userUrl_user + ...UserMentionTooltip_user +} + +fragment ExpandablePostByline_user on User { + __typename + id + name + imageId + ...userUrl_user + ...useIsVerifiedBookAuthor_user +} + +fragment CardByline_collection on Collection { + name + ...collectionUrl_collection + __typename + id +} + +fragment ExpandablePostByline_collection on Collection { + __typename + id + name + domain + slug +} + +fragment InteractivePostBody_postPreview on Post { + extendedPreviewContent( + truncationConfig: {previewParagraphsWordCountThreshold: 400, minimumWordLengthForTruncation: 150, truncateAtEndOfSentence: true, showFullImageCaptions: true, shortformPreviewParagraphsWordCountThreshold: 30, shortformMinimumWordLengthForTruncation: 30} + ) { + bodyModel { + ...PostBody_bodyModel + __typename + } + isFullContent + __typename + } + __typename + id +} + +fragment PostBody_bodyModel on RichText { + sections { + name + startIndex + textLayout + imageLayout + backgroundImage { + id + originalHeight + originalWidth + __typename + } + videoLayout + backgroundVideo { + videoId + originalHeight + originalWidth + previewImageId + __typename + } + __typename + } + paragraphs { + id + ...PostBodySection_paragraph + __typename + } + ...normalizedBodyModel_richText + __typename +} + +fragment PostBodySection_paragraph on Paragraph { + name + ...PostBodyParagraph_paragraph + __typename + id +} + +fragment PostBodyParagraph_paragraph on Paragraph { + name + type + ...ImageParagraph_paragraph + ...TextParagraph_paragraph + ...IframeParagraph_paragraph + ...MixtapeParagraph_paragraph + ...CodeBlockParagraph_paragraph + __typename + id +} + +fragment ImageParagraph_paragraph on Paragraph { + href + layout + metadata { + id + originalHeight + originalWidth + focusPercentX + focusPercentY + alt + __typename + } + ...Markups_paragraph + ...ParagraphRefsMapContext_paragraph + ...PostAnnotationsMarker_paragraph + __typename + id +} + +fragment Markups_paragraph on Paragraph { + name + text + hasDropCap + dropCapImage { + ...MarkupNode_data_dropCapImage + __typename + id + } + markups { + ...Markups_markup + __typename + } + __typename + id +} + +fragment MarkupNode_data_dropCapImage on ImageMetadata { + ...DropCap_image + __typename + id +} + +fragment DropCap_image on ImageMetadata { + id + originalHeight + originalWidth + __typename +} + +fragment Markups_markup on Markup { + type + start + end + href + anchorType + userId + linkMetadata { + httpStatus + __typename + } + __typename +} + +fragment ParagraphRefsMapContext_paragraph on Paragraph { + id + name + text + __typename +} + +fragment PostAnnotationsMarker_paragraph on Paragraph { + ...PostViewNoteCard_paragraph + __typename + id +} + +fragment PostViewNoteCard_paragraph on Paragraph { + name + __typename + id +} + +fragment TextParagraph_paragraph on Paragraph { + type + hasDropCap + codeBlockMetadata { + mode + lang + __typename + } + ...Markups_paragraph + ...ParagraphRefsMapContext_paragraph + __typename + id +} + +fragment IframeParagraph_paragraph on Paragraph { + type + iframe { + mediaResource { + id + iframeSrc + iframeHeight + iframeWidth + title + __typename + } + __typename + } + layout + ...Markups_paragraph + __typename + id +} + +fragment MixtapeParagraph_paragraph on Paragraph { + type + mixtapeMetadata { + href + mediaResource { + mediumCatalog { + id + __typename + } + __typename + } + __typename + } + ...GenericMixtapeParagraph_paragraph + __typename + id +} + +fragment GenericMixtapeParagraph_paragraph on Paragraph { + text + mixtapeMetadata { + href + thumbnailImageId + __typename + } + markups { + start + end + type + href + __typename + } + __typename + id +} + +fragment CodeBlockParagraph_paragraph on Paragraph { + codeBlockMetadata { + lang + mode + __typename + } + __typename + id +} + +fragment normalizedBodyModel_richText on RichText { + paragraphs { + ...normalizedBodyModel_richText_paragraphs + __typename + } + sections { + startIndex + ...getSectionEndIndex_section + __typename + } + ...getParagraphStyles_richText + ...getParagraphSpaces_richText + __typename +} + +fragment normalizedBodyModel_richText_paragraphs on Paragraph { + markups { + ...normalizedBodyModel_richText_paragraphs_markups + __typename + } + codeBlockMetadata { + lang + mode + __typename + } + ...getParagraphHighlights_paragraph + ...getParagraphPrivateNotes_paragraph + __typename + id +} + +fragment normalizedBodyModel_richText_paragraphs_markups on Markup { + type + __typename +} + +fragment getParagraphHighlights_paragraph on Paragraph { + name + __typename + id +} + +fragment getParagraphPrivateNotes_paragraph on Paragraph { + name + __typename + id +} + +fragment getSectionEndIndex_section on Section { + startIndex + __typename +} + +fragment getParagraphStyles_richText on RichText { + paragraphs { + text + type + __typename + } + sections { + ...getSectionEndIndex_section + __typename + } + __typename +} + +fragment getParagraphSpaces_richText on RichText { + paragraphs { + layout + metadata { + originalHeight + originalWidth + id + __typename + } + type + ...paragraphExtendsImageGrid_paragraph + __typename + } + ...getSeriesParagraphTopSpacings_richText + ...getPostParagraphTopSpacings_richText + __typename +} + +fragment paragraphExtendsImageGrid_paragraph on Paragraph { + layout + type + __typename + id +} + +fragment getSeriesParagraphTopSpacings_richText on RichText { + paragraphs { + id + __typename + } + sections { + ...getSectionEndIndex_section + __typename + } + __typename +} + +fragment getPostParagraphTopSpacings_richText on RichText { + paragraphs { + type + layout + text + codeBlockMetadata { + lang + mode + __typename + } + __typename + } + sections { + ...getSectionEndIndex_section + __typename + } + __typename +} + +fragment CardByline_post on Post { + ...DraftStatus_post + ...Star_post + ...shouldShowPublishedInStatus_post + __typename + id +} + +fragment DraftStatus_post on Post { + id + pendingCollection { + id + creator { + id + __typename + } + ...BoldCollectionName_collection + __typename + } + statusForCollection + creator { + id + __typename + } + isPublished + __typename +} + +fragment BoldCollectionName_collection on Collection { + id + name + __typename +} + +fragment Star_post on Post { + id + creator { + id + __typename + } + __typename +} + +fragment shouldShowPublishedInStatus_post on Post { + statusForCollection + isPublished + __typename + id +} + +fragment PostFooterActionsBar_post on Post { + id + visibility + allowResponses + postResponses { + count + __typename + } + isLimitedState + creator { + id + __typename + } + collection { + id + __typename + } + ...MultiVote_post + ...PostSharePopover_post + ...OverflowMenuButtonWithNegativeSignal_post + ...PostPageBookmarkButton_post + __typename +} + +fragment MultiVote_post on Post { + id + creator { + id + ...SusiClickable_user + __typename + } + isPublished + ...SusiClickable_post + collection { + id + slug + __typename + } + isLimitedState + ...MultiVoteCount_post + __typename +} + +fragment MultiVoteCount_post on Post { + id + __typename +} + +fragment PostSharePopover_post on Post { + id + mediumUrl + title + isPublished + isLocked + ...usePostUrl_post + ...FriendLink_post + __typename +} + +fragment usePostUrl_post on Post { + id + creator { + ...userUrl_user + __typename + id + } + collection { + id + domain + slug + __typename + } + isSeries + mediumUrl + sequence { + slug + __typename + } + uniqueSlug + __typename +} + +fragment FriendLink_post on Post { + id + ...SusiClickable_post + ...useCopyFriendLink_post + __typename +} + +fragment useCopyFriendLink_post on Post { + ...usePostUrl_post + __typename + id +} + +fragment OverflowMenuButtonWithNegativeSignal_post on Post { + id + visibility + ...OverflowMenuWithNegativeSignal_post + __typename +} + +fragment OverflowMenuWithNegativeSignal_post on Post { + id + creator { + id + __typename + } + collection { + id + __typename + } + ...OverflowMenuItemUndoClaps_post + ...AddToCatalogBase_post + __typename +} + +fragment OverflowMenuItemUndoClaps_post on Post { + id + clapCount + ...ClapMutation_post + __typename +} + +fragment ClapMutation_post on Post { + __typename + id + clapCount + ...MultiVoteCount_post +} + +fragment AddToCatalogBase_post on Post { + id + isPublished + __typename +} + +fragment PostPageBookmarkButton_post on Post { + ...AddToCatalogBookmarkButton_post + __typename + id +} + +fragment AddToCatalogBookmarkButton_post on Post { + ...AddToCatalogBase_post + __typename + id +} + +fragment InResponseToEntityPreview_post on Post { + id + inResponseToEntityType + __typename +} + +fragment PostScrollTracker_post on Post { + id + collection { + id + __typename + } + sequence { + sequenceId + __typename + } + __typename +} + +fragment HighDensityPreview_post on Post { + id + title + previewImage { + id + focusPercentX + focusPercentY + __typename + } + extendedPreviewContent( + truncationConfig: {previewParagraphsWordCountThreshold: 400, minimumWordLengthForTruncation: 150, truncateAtEndOfSentence: true, showFullImageCaptions: true, shortformPreviewParagraphsWordCountThreshold: 30, shortformMinimumWordLengthForTruncation: 30} + ) { + subtitle + __typename + } + ...HighDensityFooter_post + __typename +} + +fragment HighDensityFooter_post on Post { + id + readingTime + tags { + ...TopicPill_tag + __typename + } + ...BookmarkButton_post + ...ExpandablePostCardOverflowButton_post + ...OverflowMenuButtonWithNegativeSignal_post + __typename +} + +fragment TopicPill_tag on Tag { + __typename + id + displayTitle + normalizedTagSlug +} + +fragment BookmarkButton_post on Post { + visibility + ...SusiClickable_post + ...AddToCatalogBookmarkButton_post + __typename + id +} + +fragment ExpandablePostCardOverflowButton_post on Post { + creator { + id + __typename + } + ...ExpandablePostCardReaderButton_post + __typename + id +} + +fragment ExpandablePostCardReaderButton_post on Post { + id + collection { + id + __typename + } + creator { + id + __typename + } + clapCount + ...ClapMutation_post + __typename +} + +fragment CardByline_publisher on Publisher { + __typename + ... on User { + id + ...CardByline_user + __typename + } + ... on Collection { + id + ...CardByline_collection + __typename + } +} + +fragment NewsletterV3Promo_publisher on Publisher { + __typename + ... on User { + ...NewsletterV3Promo_user + __typename + id + } + ... on Collection { + ...NewsletterV3Promo_collection + __typename + id + } +} + +fragment NewsletterV3Promo_user on User { + id + username + name + viewerEdge { + isUser + __typename + id + } + newsletterV3 { + id + ...NewsletterV3Promo_newsletterV3 + __typename + } + __typename +} + +fragment NewsletterV3Promo_newsletterV3 on NewsletterV3 { + slug + name + description + promoHeadline + promoBody + ...NewsletterSubscribeComponent_newsletterV3 + __typename + id +} + +fragment NewsletterSubscribeComponent_newsletterV3 on NewsletterV3 { + ...NewsletterV3SubscribeButton_newsletterV3 + ...NewsletterV3SubscribeByEmail_newsletterV3 + __typename + id +} + +fragment NewsletterV3SubscribeButton_newsletterV3 on NewsletterV3 { + id + name + slug + type + user { + id + name + username + __typename + } + collection { + slug + ...SusiClickable_collection + ...collectionDefaultBackgroundTheme_collection + __typename + id + } + ...SusiClickable_newsletterV3 + ...useNewsletterV3Subscription_newsletterV3 + __typename +} + +fragment collectionDefaultBackgroundTheme_collection on Collection { + colorPalette { + ...collectionDefaultBackgroundTheme_colorPalette + __typename + } + customStyleSheet { + id + ...collectionDefaultBackgroundTheme_customStyleSheet + __typename + } + __typename + id +} + +fragment collectionDefaultBackgroundTheme_colorPalette on ColorPalette { + ...customDefaultBackgroundTheme_colorPalette + __typename +} + +fragment customDefaultBackgroundTheme_colorPalette on ColorPalette { + highlightSpectrum { + ...ThemeUtil_colorSpectrum + __typename + } + defaultBackgroundSpectrum { + ...ThemeUtil_colorSpectrum + __typename + } + tintBackgroundSpectrum { + ...ThemeUtil_colorSpectrum + __typename + } + __typename +} + +fragment collectionDefaultBackgroundTheme_customStyleSheet on CustomStyleSheet { + id + ...customDefaultBackgroundTheme_customStyleSheet + __typename +} + +fragment customDefaultBackgroundTheme_customStyleSheet on CustomStyleSheet { + id + global { + colorPalette { + primary { + colorPalette { + ...customDefaultBackgroundTheme_colorPalette + __typename + } + __typename + } + background { + colorPalette { + ...customDefaultBackgroundTheme_colorPalette + __typename + } + __typename + } + __typename + } + __typename + } + __typename +} + +fragment SusiClickable_newsletterV3 on NewsletterV3 { + ...SusiContainer_newsletterV3 + __typename + id +} + +fragment SusiContainer_newsletterV3 on NewsletterV3 { + ...SignInOptions_newsletterV3 + ...SignUpOptions_newsletterV3 + __typename + id +} + +fragment SignInOptions_newsletterV3 on NewsletterV3 { + id + name + __typename +} + +fragment SignUpOptions_newsletterV3 on NewsletterV3 { + id + name + __typename +} + +fragment NewsletterV3SubscribeByEmail_newsletterV3 on NewsletterV3 { + id + slug + type + user { + id + name + username + __typename + } + collection { + ...collectionDefaultBackgroundTheme_collection + ...collectionUrl_collection + __typename + id + } + __typename +} + +fragment NewsletterV3Promo_collection on Collection { + id + slug + domain + name + newsletterV3 { + id + ...NewsletterV3Promo_newsletterV3 + __typename + } + __typename +} + +fragment PublisherHomepagePosts_user on User { + id + ...useShowAuthorNewsletterV3Promo_user + __typename +} + +fragment useShowAuthorNewsletterV3Promo_user on User { + id + username + newsletterV3 { + id + showPromo + slug + __typename + } + __typename +} + +fragment usePublicationAnalytics_collection on Collection { + id + googleAnalyticsId + __typename +} + +fragment CollectionMetadata_collection on Collection { + avatar { + id + focusPercentX + focusPercentY + originalHeight + originalWidth + __typename + } + creator { + id + twitterScreenName + ...userUrl_user + __typename + } + description + domain + facebookPageId + name + tags + twitterUsername + createdAt + ptsQualifiedAt + customDomainState { + live { + status + isSubdomain + __typename + } + __typename + } + ...collectionUrl_collection + ...CollectionJsonLd_collection + __typename + id +} + +fragment CollectionJsonLd_collection on Collection { + id + logo { + ...PrepareLogoForJsonLd_imageMetadata + __typename + id + } + avatar { + id + focusPercentX + focusPercentY + originalHeight + originalWidth + __typename + } + domain + name + ...collectionUrl_collection + __typename +} + +fragment PrepareLogoForJsonLd_imageMetadata on ImageMetadata { + id + originalWidth + originalHeight + __typename +} +""" } + @register_scraper class MediumScraper(BaseScraper): - url = "https://medium.com/feed/snowflake/tagged/snowflake" + url = "https://medium.com/_/graphql" - def __init__(self, after, *args, **kwargs): + def __init__(self, *args, **kwargs): super(MediumScraper, self).__init__(*args, **kwargs) self.data = {} - self.after = after or datetime(1970, 1, 1, tzinfo=timezone.utc) + self.after = datetime(1970, 1, 1, tzinfo=timezone.utc) + + def make_request(self, query_vars): + response = requests.post(self.url, json=query_vars) + post_data = response.json()["data"]["collection"]["homepagePostsConnection"] + paging_info = post_data['pagingInfo'] + return post_data["posts"], paging_info def scrape(self): print("Scraping Medium") - for entry in feedparser.parse(MediumScraper.url)["entries"]: - updated = string_to_datetime(entry["updated"]) - if updated > self.after: - self.data[entry["link"]] = { - "title": entry["title"], - "published": string_to_datetime(entry["published"]), - "updated": updated, - } + query_vars = LONG_QUERY | { + "variables": { + "homepagePostsLimit": 25, + "includeDistributedResponses": False, + "collectionId": "34b6daafc07", + "homepagePostsFrom": "0" + } + } + + while True: + posts, paging_info = self.make_request(query_vars) + + for post in posts: + if post["visibility"] == "PUBLIC": + self.data[post["mediumUrl"]] = { + "title": post["title"], + "published": unix_to_datetime_utc(post["firstPublishedAt"]), + "updated": unix_to_datetime_utc(post["latestPublishedAt"]) + } + + if paging_info is None: + break + + query_vars['variables']['homepagePostsFrom'] = paging_info['next']['from'] + query_vars['variables']['homepagePostsLimit'] = paging_info['next']['limit'] + return self.data def transform(self): diff --git a/snowscraper/scrapers/quickstarts.py b/snowscraper/scrapers/quickstarts.py deleted file mode 100644 index 4b3ff07..0000000 --- a/snowscraper/scrapers/quickstarts.py +++ /dev/null @@ -1,53 +0,0 @@ -from datetime import datetime -from datetime import timezone - -import scrapy -from scrapy import signals -from scrapy.crawler import CrawlerProcess -from scrapy.signalmanager import dispatcher - -from ..controller import register_scraper -from ..scraper import BaseScraper -from snowscraper.helpers import string_to_datetime - -QuickStartsURL = "https://quickstarts.snowflake.com/" - - -@register_scraper -class QuickstartScraper(BaseScraper, scrapy.Spider): - name = "snowflakespider" - - def __init__(self, after, *args, **kwargs): - super(QuickstartScraper, self).__init__(*args, **kwargs) - self.data = {} - self.after = after or datetime(1970, 1, 1, tzinfo=timezone.utc) - - def start_requests(self): - yield scrapy.Request(url=QuickStartsURL, callback=self.parse) - - def signal_handler(self, signal, sender, item, response, spider): - self.data[item["key"]] = item - self.data[item["key"]].pop("key") - - def scrape(self): - print("Scraping Quickstarts") - dispatcher.connect(self.signal_handler, signal=signals.item_scraped) - process = CrawlerProcess({"LOG_LEVEL": "ERROR"}) - process.crawl(QuickstartScraper, after=self.after) - process.start() - return self.data - - def parse(self, response): - for card in response.css("card-sorter#cards > a.codelab-card"): - updated = string_to_datetime(card.attrib["data-updated"]) - if updated > self.after: - key = QuickStartsURL.rstrip("/") + card.attrib["href"] - yield { - "key": key, - "title": card.attrib["data-title"], - "updated": updated, - "tags": card.attrib["data-tags"], - } - - def transform(self): - return self.data