Biblioteca de queriesImportar HTML de URLs como novos posts no WordPress
Importar HTML de URLs como novos posts no WordPress
Esta query importa as páginas HTML dos URLs fornecidos como novos posts no WordPress.
Para cada URL, ela recupera o título de <title>...</title> no meta, e o conteúdo de <body>...</body>, ou personalizado para um elemento HTML interno específico usando a variável $contentMatchInnerRegex.
Com $contentMatchInnerRegex, podemos definir qual subparte do HTML do <body> capturar.
Por exemplo, se o conteúdo deve ser extraído de:
<article class="main">...</article>...podemos capturá-lo com:
{
"contentMatchInnerRegex": ".*?<\\s*?article\\b[^>]*>(.*?)<\\/article\\b[^>]*>.*?"
}query GenerateURLInputs(
$urls: [URL!]!
$contentMatchInnerRegex: String! = "(.*?)"
) {
urlInputs: _echo(value: $urls)
@underEachArrayItem(
passValueOnwardsAs: "url"
)
@applyField(
name: "_echo",
arguments: {
value: {
url: $url,
method: GET
}
},
setResultInResponse: true
)
@export(as: "urlInputs")
@remove
contentMatchRegex: _sprintf(
string: "/(?:<!DOCTYPE html>)?<\\s*?html\\b[^>]*>.*?<\\s*?body\\b[^>]*>%s<\\/body\\b[^>]*>.*?<\\/html\\b[^>]*>/sim",
values: [$contentMatchInnerRegex]
)
@export(as: "contentMatchRegex")
}
query RequestPages
@depends(on: "GenerateURLInputs")
{
urlContents: _sendHTTPRequests(inputs: $urlInputs, async: false) {
statusCode
body
@remove
title: _strRegexReplace(
searchRegex: "/(?:<!DOCTYPE html>)?<\\s*?html\\b[^>]*>.*?<head\\b[^>]*>.*?<\\s*?title\\b[^>]*>(.*?)<\\/title\\b[^>]*>.*?<\\/head\\b[^>]*>(.*?)<\\/html\\b[^>]*>/sim"
replaceWith: "$1"
in: $__body
)
content: _strRegexReplace(
searchRegex: $contentMatchRegex
replaceWith: "$1"
in: $__body
)
createPostInput: _echo(value: {
status: publish,
title: $__title
contentAs: {
html: $__content
}
})
@export(as: "createPostInputs", type: LIST)
}
}
mutation CreatePostsFromURLs
@depends(on: "RequestPages")
{
createPosts(inputs: $createPostInputs) {
status
errors {
__typename
...on ErrorPayload {
message
}
}
post {
id
status
title
content
}
}
}