Query minimization and isPublished check

This commit is contained in:
hackish 2023-09-19 12:21:52 -07:00
parent d044f849ea
commit 0e079ece25

View file

@ -11,21 +11,21 @@ from ..scraper import BaseScraper
LONG_QUERY = """
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
collection(id: $collectionId) {
...PublicationHomepage_collection
}
}
fragment PublicationHomepage_collection on Collection {
...PublisherHomepagePosts_publisher
}
fragment PublisherHomepagePosts_publisher on Publisher {
homepagePostsConnection(
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
includeDistributedResponses: $includeDistributedResponses
) {
posts {
...PostPreview_post
firstPublishedAt
latestPublishedAt
title
uniqueSlug
visibility
mediumUrl
isPublished
tags {
normalizedTagSlug
}
}
pagingInfo {
next {
@ -35,39 +35,6 @@ fragment PublisherHomepagePosts_publisher on Publisher {
}
}
}
fragment PostPreview_post on Post {
firstPublishedAt
latestPublishedAt
title
uniqueSlug
...PostFooterActionsBar_post
...HighDensityPreview_post
}
fragment PostFooterActionsBar_post on Post {
visibility
...PostSharePopover_post
}
fragment PostSharePopover_post on Post {
mediumUrl
isPublished
}
fragment HighDensityPreview_post on Post {
...HighDensityFooter_post
}
fragment HighDensityFooter_post on Post {
tags {
...TopicPill_tag
}
}
fragment TopicPill_tag on Tag {
displayTitle
normalizedTagSlug
}
"""
@ -102,7 +69,7 @@ class MediumScraper(BaseScraper):
posts, paging_info = self.make_request(query_vars)
for post in posts:
if post["visibility"] == "PUBLIC":
if post["visibility"] == "PUBLIC" and post["isPublished"]:
self.data[post["mediumUrl"]] = {
"title": post["title"],
"published": unix_to_datetime_utc(post["firstPublishedAt"]),