Query minimization and isPublished check
This commit is contained in:
parent
d044f849ea
commit
0e079ece25
1 changed files with 22 additions and 55 deletions
|
@ -11,21 +11,21 @@ from ..scraper import BaseScraper
|
|||
LONG_QUERY = """
|
||||
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
|
||||
collection(id: $collectionId) {
|
||||
...PublicationHomepage_collection
|
||||
}
|
||||
}
|
||||
|
||||
fragment PublicationHomepage_collection on Collection {
|
||||
...PublisherHomepagePosts_publisher
|
||||
}
|
||||
|
||||
fragment PublisherHomepagePosts_publisher on Publisher {
|
||||
homepagePostsConnection(
|
||||
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
|
||||
includeDistributedResponses: $includeDistributedResponses
|
||||
) {
|
||||
posts {
|
||||
...PostPreview_post
|
||||
firstPublishedAt
|
||||
latestPublishedAt
|
||||
title
|
||||
uniqueSlug
|
||||
visibility
|
||||
mediumUrl
|
||||
isPublished
|
||||
tags {
|
||||
normalizedTagSlug
|
||||
}
|
||||
}
|
||||
pagingInfo {
|
||||
next {
|
||||
|
@ -35,39 +35,6 @@ fragment PublisherHomepagePosts_publisher on Publisher {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragment PostPreview_post on Post {
|
||||
firstPublishedAt
|
||||
latestPublishedAt
|
||||
title
|
||||
uniqueSlug
|
||||
...PostFooterActionsBar_post
|
||||
...HighDensityPreview_post
|
||||
}
|
||||
|
||||
fragment PostFooterActionsBar_post on Post {
|
||||
visibility
|
||||
...PostSharePopover_post
|
||||
}
|
||||
|
||||
fragment PostSharePopover_post on Post {
|
||||
mediumUrl
|
||||
isPublished
|
||||
}
|
||||
|
||||
fragment HighDensityPreview_post on Post {
|
||||
...HighDensityFooter_post
|
||||
}
|
||||
|
||||
fragment HighDensityFooter_post on Post {
|
||||
tags {
|
||||
...TopicPill_tag
|
||||
}
|
||||
}
|
||||
|
||||
fragment TopicPill_tag on Tag {
|
||||
displayTitle
|
||||
normalizedTagSlug
|
||||
}
|
||||
"""
|
||||
|
||||
|
@ -102,7 +69,7 @@ class MediumScraper(BaseScraper):
|
|||
posts, paging_info = self.make_request(query_vars)
|
||||
|
||||
for post in posts:
|
||||
if post["visibility"] == "PUBLIC":
|
||||
if post["visibility"] == "PUBLIC" and post["isPublished"]:
|
||||
self.data[post["mediumUrl"]] = {
|
||||
"title": post["title"],
|
||||
"published": unix_to_datetime_utc(post["firstPublishedAt"]),
|
||||
|
|
Loading…
Reference in a new issue