Query minimization and isPublished check

This commit is contained in:
hackish 2023-09-19 12:21:52 -07:00
parent d044f849ea
commit 0e079ece25

View file

@ -11,21 +11,21 @@ from ..scraper import BaseScraper
LONG_QUERY = """ LONG_QUERY = """
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) { query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
collection(id: $collectionId) { collection(id: $collectionId) {
...PublicationHomepage_collection
}
}
fragment PublicationHomepage_collection on Collection {
...PublisherHomepagePosts_publisher
}
fragment PublisherHomepagePosts_publisher on Publisher {
homepagePostsConnection( homepagePostsConnection(
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom} paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
includeDistributedResponses: $includeDistributedResponses includeDistributedResponses: $includeDistributedResponses
) { ) {
posts { posts {
...PostPreview_post firstPublishedAt
latestPublishedAt
title
uniqueSlug
visibility
mediumUrl
isPublished
tags {
normalizedTagSlug
}
} }
pagingInfo { pagingInfo {
next { next {
@ -35,39 +35,6 @@ fragment PublisherHomepagePosts_publisher on Publisher {
} }
} }
} }
fragment PostPreview_post on Post {
firstPublishedAt
latestPublishedAt
title
uniqueSlug
...PostFooterActionsBar_post
...HighDensityPreview_post
}
fragment PostFooterActionsBar_post on Post {
visibility
...PostSharePopover_post
}
fragment PostSharePopover_post on Post {
mediumUrl
isPublished
}
fragment HighDensityPreview_post on Post {
...HighDensityFooter_post
}
fragment HighDensityFooter_post on Post {
tags {
...TopicPill_tag
}
}
fragment TopicPill_tag on Tag {
displayTitle
normalizedTagSlug
} }
""" """
@ -102,7 +69,7 @@ class MediumScraper(BaseScraper):
posts, paging_info = self.make_request(query_vars) posts, paging_info = self.make_request(query_vars)
for post in posts: for post in posts:
if post["visibility"] == "PUBLIC": if post["visibility"] == "PUBLIC" and post["isPublished"]:
self.data[post["mediumUrl"]] = { self.data[post["mediumUrl"]] = {
"title": post["title"], "title": post["title"],
"published": unix_to_datetime_utc(post["firstPublishedAt"]), "published": unix_to_datetime_utc(post["firstPublishedAt"]),