Query minimization and isPublished check

This commit is contained in:
hackish 2023-09-19 12:21:52 -07:00
parent d044f849ea
commit 0e079ece25

View file

@ -11,64 +11,31 @@ from ..scraper import BaseScraper
LONG_QUERY = """ LONG_QUERY = """
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) { query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
collection(id: $collectionId) { collection(id: $collectionId) {
...PublicationHomepage_collection homepagePostsConnection(
} paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
} includeDistributedResponses: $includeDistributedResponses
) {
fragment PublicationHomepage_collection on Collection { posts {
...PublisherHomepagePosts_publisher firstPublishedAt
} latestPublishedAt
title
fragment PublisherHomepagePosts_publisher on Publisher { uniqueSlug
homepagePostsConnection( visibility
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom} mediumUrl
includeDistributedResponses: $includeDistributedResponses isPublished
) { tags {
posts { normalizedTagSlug
...PostPreview_post }
} }
pagingInfo { pagingInfo {
next { next {
from from
limit limit
}
} }
} }
} }
} }
fragment PostPreview_post on Post {
firstPublishedAt
latestPublishedAt
title
uniqueSlug
...PostFooterActionsBar_post
...HighDensityPreview_post
}
fragment PostFooterActionsBar_post on Post {
visibility
...PostSharePopover_post
}
fragment PostSharePopover_post on Post {
mediumUrl
isPublished
}
fragment HighDensityPreview_post on Post {
...HighDensityFooter_post
}
fragment HighDensityFooter_post on Post {
tags {
...TopicPill_tag
}
}
fragment TopicPill_tag on Tag {
displayTitle
normalizedTagSlug
}
""" """
@register_scraper @register_scraper
@ -102,7 +69,7 @@ class MediumScraper(BaseScraper):
posts, paging_info = self.make_request(query_vars) posts, paging_info = self.make_request(query_vars)
for post in posts: for post in posts:
if post["visibility"] == "PUBLIC": if post["visibility"] == "PUBLIC" and post["isPublished"]:
self.data[post["mediumUrl"]] = { self.data[post["mediumUrl"]] = {
"title": post["title"], "title": post["title"],
"published": unix_to_datetime_utc(post["firstPublishedAt"]), "published": unix_to_datetime_utc(post["firstPublishedAt"]),