Query minimization and isPublished check
This commit is contained in:
parent
d044f849ea
commit
0e079ece25
1 changed files with 22 additions and 55 deletions
|
@ -11,64 +11,31 @@ from ..scraper import BaseScraper
|
||||||
LONG_QUERY = """
|
LONG_QUERY = """
|
||||||
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
|
query PublicationHomepageQuery($collectionId: ID!, $homepagePostsLimit: PaginationLimit = 25, $homepagePostsFrom: String, $includeDistributedResponses: Boolean = false) {
|
||||||
collection(id: $collectionId) {
|
collection(id: $collectionId) {
|
||||||
...PublicationHomepage_collection
|
homepagePostsConnection(
|
||||||
}
|
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
|
||||||
}
|
includeDistributedResponses: $includeDistributedResponses
|
||||||
|
) {
|
||||||
fragment PublicationHomepage_collection on Collection {
|
posts {
|
||||||
...PublisherHomepagePosts_publisher
|
firstPublishedAt
|
||||||
}
|
latestPublishedAt
|
||||||
|
title
|
||||||
fragment PublisherHomepagePosts_publisher on Publisher {
|
uniqueSlug
|
||||||
homepagePostsConnection(
|
visibility
|
||||||
paging: {limit: $homepagePostsLimit, from: $homepagePostsFrom}
|
mediumUrl
|
||||||
includeDistributedResponses: $includeDistributedResponses
|
isPublished
|
||||||
) {
|
tags {
|
||||||
posts {
|
normalizedTagSlug
|
||||||
...PostPreview_post
|
}
|
||||||
}
|
}
|
||||||
pagingInfo {
|
pagingInfo {
|
||||||
next {
|
next {
|
||||||
from
|
from
|
||||||
limit
|
limit
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment PostPreview_post on Post {
|
|
||||||
firstPublishedAt
|
|
||||||
latestPublishedAt
|
|
||||||
title
|
|
||||||
uniqueSlug
|
|
||||||
...PostFooterActionsBar_post
|
|
||||||
...HighDensityPreview_post
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment PostFooterActionsBar_post on Post {
|
|
||||||
visibility
|
|
||||||
...PostSharePopover_post
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment PostSharePopover_post on Post {
|
|
||||||
mediumUrl
|
|
||||||
isPublished
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment HighDensityPreview_post on Post {
|
|
||||||
...HighDensityFooter_post
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment HighDensityFooter_post on Post {
|
|
||||||
tags {
|
|
||||||
...TopicPill_tag
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment TopicPill_tag on Tag {
|
|
||||||
displayTitle
|
|
||||||
normalizedTagSlug
|
|
||||||
}
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@register_scraper
|
@register_scraper
|
||||||
|
@ -102,7 +69,7 @@ class MediumScraper(BaseScraper):
|
||||||
posts, paging_info = self.make_request(query_vars)
|
posts, paging_info = self.make_request(query_vars)
|
||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
if post["visibility"] == "PUBLIC":
|
if post["visibility"] == "PUBLIC" and post["isPublished"]:
|
||||||
self.data[post["mediumUrl"]] = {
|
self.data[post["mediumUrl"]] = {
|
||||||
"title": post["title"],
|
"title": post["title"],
|
||||||
"published": unix_to_datetime_utc(post["firstPublishedAt"]),
|
"published": unix_to_datetime_utc(post["firstPublishedAt"]),
|
||||||
|
|
Loading…
Reference in a new issue