%PDF- %PDF-
Direktori : /www/varak.net/nextcloud.varak.net/nextcloud/3rdparty/fusonic/opengraph/src/ |
Current File : //www/varak.net/nextcloud.varak.net/nextcloud/3rdparty/fusonic/opengraph/src/Consumer.php |
<?php namespace Fusonic\OpenGraph; use Fusonic\OpenGraph\Objects\ObjectBase; use Fusonic\OpenGraph\Objects\Website; use LogicException; use Psr\Http\Client\ClientExceptionInterface; use Psr\Http\Client\ClientInterface; use Psr\Http\Message\RequestFactoryInterface; use Symfony\Component\DomCrawler\Crawler; /** * Consumer that extracts Open Graph data from either a URL or a HTML string. */ class Consumer { private ?ClientInterface $client; private ?RequestFactoryInterface $requestFactory; /** * When enabled, crawler will read content of title and meta description if no * Open Graph data is provided by target page. */ public bool $useFallbackMode = false; /** * When enabled, crawler will throw exceptions for some crawling errors like unexpected * Open Graph elements. */ public bool $debug = false; /** * @param ClientInterface|null $client A PSR-18 ClientInterface implementation. * @param RequestFactoryInterface|null $requestFactory A PSR-17 RequestFactoryInterface implementation. */ public function __construct(?ClientInterface $client = null, ?RequestFactoryInterface $requestFactory = null) { $this->client = $client; $this->requestFactory = $requestFactory; } /** * Fetches HTML content from the given URL and then crawls it for Open Graph data. * * @param string $url URL to be crawled. * * @return ObjectBase * * @throws ClientExceptionInterface */ public function loadUrl(string $url): ObjectBase { if ($this->client === null) { throw new LogicException( "To use loadUrl() you must provide \$client and \$requestFactory when instantiating the consumer." ); } $request = $this->requestFactory->createRequest("GET", $url); $response = $this->client->sendRequest($request); return $this->loadHtml($response->getBody()->getContents(), $url); } /** * Crawls the given HTML string for OpenGraph data. * * @param string $html HTML string, usually whole content of crawled web resource. * @param string $fallbackUrl URL to use when fallback mode is enabled. * * @return ObjectBase */ public function loadHtml(string $html, string $fallbackUrl = null): ObjectBase { // Extract all data that can be found $page = $this->extractOpenGraphData($html); // Use the user's URL as fallback if ($this->useFallbackMode && $page->url === null) { $page->url = $fallbackUrl; } // Return result return $page; } private function extractOpenGraphData(string $content): ObjectBase { $crawler = new Crawler; $crawler->addHTMLContent($content, 'UTF-8'); $properties = []; foreach(['name', 'property'] as $t) { // Get all meta-tags starting with "og:" $ogMetaTags = $crawler->filter("meta[{$t}^='og:']"); // Create clean property array $props = []; foreach ($ogMetaTags as $tag) { $name = strtolower(trim($tag->getAttribute($t))); $value = trim($tag->getAttribute("content")); $props[] = new Property($name, $value); } $properties = array_merge($properties, $props); } // Create new object $object = new Website(); // Assign all properties to the object $object->assignProperties($properties, $this->debug); // Fallback for url if ($this->useFallbackMode && !$object->url) { $urlElement = $crawler->filter("link[rel='canonical']")->first(); if ($urlElement->count() > 0) { $object->url = trim($urlElement->attr("href")); } } // Fallback for title if ($this->useFallbackMode && !$object->title) { $titleElement = $crawler->filter("title")->first(); if ($titleElement->count() > 0) { $object->title = trim($titleElement->text()); } } // Fallback for description if ($this->useFallbackMode && !$object->description) { $descriptionElement = $crawler->filter("meta[property='description']")->first(); if ($descriptionElement->count() > 0) { $object->description = trim($descriptionElement->attr("content")); } } return $object; } }