Biblioteca de queries
Biblioteca de queriesImportar HTML de URLs como novos posts no WordPress

Importar HTML de URLs como novos posts no WordPress

Esta query importa as páginas HTML dos URLs fornecidos como novos posts no WordPress.

Para cada URL, ela recupera o título de <title>...</title> no meta, e o conteúdo de <body>...</body>, ou personalizado para um elemento HTML interno específico usando a variável $contentMatchInnerRegex.

Com $contentMatchInnerRegex, podemos definir qual subparte do HTML do <body> capturar.

Por exemplo, se o conteúdo deve ser extraído de:

<article class="main">...</article>

...podemos capturá-lo com:

{
  "contentMatchInnerRegex": ".*?<\\s*?article\\b[^>]*>(.*?)<\\/article\\b[^>]*>.*?"
}
query GenerateURLInputs(
  $urls: [URL!]!
  $contentMatchInnerRegex: String! = "(.*?)"
) {
  urlInputs: _echo(value: $urls)
    @underEachArrayItem(
      passValueOnwardsAs: "url"
    )
      @applyField(
        name: "_echo",
        arguments: {
          value: {
            url: $url,
            method: GET
          }
        },
        setResultInResponse: true
      )
    @export(as: "urlInputs")
    @remove
  contentMatchRegex: _sprintf(
    string: "/(?:<!DOCTYPE html>)?<\\s*?html\\b[^>]*>.*?<\\s*?body\\b[^>]*>%s<\\/body\\b[^>]*>.*?<\\/html\\b[^>]*>/sim",
    values: [$contentMatchInnerRegex]
  )
    @export(as: "contentMatchRegex")
}
 
query RequestPages
  @depends(on: "GenerateURLInputs")
{
  urlContents: _sendHTTPRequests(inputs: $urlInputs, async: false) {
    statusCode
    body
      @remove
    title: _strRegexReplace(
      searchRegex: "/(?:<!DOCTYPE html>)?<\\s*?html\\b[^>]*>.*?<head\\b[^>]*>.*?<\\s*?title\\b[^>]*>(.*?)<\\/title\\b[^>]*>.*?<\\/head\\b[^>]*>(.*?)<\\/html\\b[^>]*>/sim"
      replaceWith: "$1"
      in: $__body
    )
    content: _strRegexReplace(
      searchRegex: $contentMatchRegex
      replaceWith: "$1"
      in: $__body
    )
    createPostInput: _echo(value: {
      status: publish,
      title: $__title
      contentAs: {
        html: $__content
      }
    })
      @export(as: "createPostInputs", type: LIST)
  }
}
 
mutation CreatePostsFromURLs
  @depends(on: "RequestPages")
{
  createPosts(inputs: $createPostInputs) {
    status
    errors {
      __typename
      ...on ErrorPayload {
        message
      }
    }
    post {
      id
      status
      title
      content
    }
  }
}