当我第一次使用WordPress HTTP API时,我编写了一个类,它可以满足您的需要:
<?php
/**
* WebPage_Info class
*
* @version 0.1
* @author Ohad Raz <[email protected]>
* @package WebPage_Info
* @copyright Ohad Raz 2011
*
*/
if (!class_exists(\'webpage_info\')){
/**
* WebPage_Info class
*/
class webpage_info{
/**
* page url
* @var string
* @access public
* @since 0.1
*/
public $_url;
/**
* page html
* @var string
* @access public
* @since 0.1
*/
public $_body;
/**
* page title
* @var string
* @access public
* @since 0.1
*/
public $_title;
/**
* page meta description
* @var string
* @access public
* @since 0.1
*/
public $_description;
/**
* page as domDocument
* @var domDocument object
* @access public
* @since 0.1
*/
public $_dom;
/**
* if page was loaded correctly or not
* @var bool
* @access public
* @since 0.1
*/
public $_found;
/**
* array of page meta keywords
* @var array
* @access public
* @since 0.1
*/
public $_keywords;
/**
* any other element which is requested by tag for faster access
* @var array
* @access public
* @since 0.1
*/
public $_tags;
/**
* any other element which is requested by id for faster access
* @var array
* @access public
* @since 0.1
*/
public $_ids;
/**
* Class constructor
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @param $url string url to load
* @param $html bool if the given url is a url or html to load
*
* @return Void
*/
public function __construct($url,$html=false){
$this->_url = $url;
$this->_body = "";
$this->_title = "";
$this->_description = "";
$this->_dom = "";
$this->_found = false;
$this->_tags = array();
$this->_ids = array();
if ($html)
$this->get_local_html($url);
else
$this->get_remote_html();
}
/**
* get_local_html will load domDocument object form html as string
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @param (string) $body oh html to load
*
* @return Void
*/
public function get_local_html($body) {
$dom = new DOMDocument();
@$dom->loadHTML($body);
$this->_dom = $dom;
$this->_found = true;
}
/**
* get_remote_html will load remote url into domDocument object
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @uses wp_remote_get and falls back to curl_exec
*
* @return Void
*/
public function get_remote_html(){
if (function_exists(\'wp_remote_get\')){
$resp = wp_remote_get( $this->url );
if ( 200 == $resp[\'response\'][\'code\'] ) {
$this->_body = $resp[\'body\'];
$this->_found = true;
}
}else{
$curl = curl_init($this->url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$result = curl_exec($curl);
if ($result !== false){
$this->_body = $result;
$this->_found = true;
}
curl_close($curl);
}
if ($this->_found){
$dom = new DOMDocument();
@$dom->loadHTML($this->_body);
$this->_dom = $dom;
}
}
/**
* Title function will return the title of the curent page
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @return string
*/
public function Title(){
if ($this->_title != ""){
return $this->_title;
}else{
$t = $this->_dom->getElementsByTagName(\'title\');
if ($t->length == 1){
$this->_title = $t->item(0)->nodeValue;
return $this->_title;
}else{
$this->_title = null;
return null;
}
}
}
/**
* Description function will return the description of the curent page
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @return string
*/
public function Description(){
if ($this->_description != ""){
return $this->_description;
}else{
$ds = $this->_dom->getElementsByTagName(\'meta\');
if ($ds->length > 0){
foreach ($ds as $d) {
if ( strtolower( $d->getAttribute( \'name\' ) ) == \'description\' ) {
$this->_description = $d->getAttribute( \'content\' );
return $this->_description;
}else {
continue;
}
}
$this->_description = null;
return null;
}else{
$this->_description = null;
return null;
}
}
}
/**
* KeyWords function will return an array of the keywords of the curent page
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @return array of strings
*/
public function Keywords(){
if (count($this->_keywords) > 0){
return $this->_keywords;
}else{
$ds = $this->_dom->getElementsByTagName(\'meta\');
if ($ds->length > 0){
foreach ($ds as $d) {
if ( strtolower( $d->getAttribute( \'name\' ) ) == \'keywords\' ) {
$this->_keywords = explode(",",$d->getAttribute( \'content\' ));
return $this->_keywords;
}else {
continue;
}
}
$this->_keywords = null;
return null;
}else{
$this->_keywords = null;
return null;
}
}
}
/**
* getEByTagName function to get elements by tag name
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @param string $tag tagName
* @param string $output dom: domElement object, array: array of tag,html,and attributes as array, html: html string of the element
* @param boolean $force force generation or load from cache
*
* @return mixed depend on what you set output to be, will return null when tag name not found
*/
public function getEByTagName($tag,$output = "dom",$force = false){
if (isset($this->_tags[$tag]) && !$force)
return $this->_tags[$tag];
$temp = $this->_dom->getElementsByTagName($tag);
if ($temp === null)
$this->_tags[$tag] = null;
return $this->_tags[$tag];
if ($output == "dom"){
$this->_tags[$tag] = $temp;
return $temp;
}elseif($output == "array"){
$tempArr = array();
foreach ($temp as $e) {
$tempA =array(\'tag\' => $tag,\'html\' => $this->_dom->saveXML($e));
if ($e->hasAttributes()) {
foreach ($e->attributes as $attr) {
$tempA[\'attributes\'][$attr->nodeName] = $attr->nodeValue;
}
}
$tempArr[] = $tempA;
}
$this->_tags[$tag] = $tempArr;
return $tempArr;
}else{
$tempArr = array();
foreach ($temp as $e) {
$tempArr[] = $this->_dom->saveXML($e);
}
$this->_tags[$tag] = $tempArr;
return $tempArr;
}
}
/**
* getEByID function to get elements by id
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @param string $id Element id to fetch
* @param string $output dom: domElement object, array: array of tag,html,and attributes as array, html: html string of the element
* @param boolean $force force generation or load from cache
*
* @return mixed depend on what you set output to be, will return null when tag name not found
*/
public function getEByID($id,$output = "dom",$force = false){
if (isset($this->ids[$id]) && !$force)
return $this->ids[$id];
//$temp = $this->_dom->getElementById($id);
$temp = $this->getElementById($id);
if ($temp === null) {
$this->ids[$id] = null;
return null;
}
if ($output == "dom"){
$this->ids[$id] = $temp;
return $temp;
}elseif($output == "array"){
$tempA =array(\'tag\' => $temp->tagName,\'html\' => $this->_dom->saveXML($temp));
if ($temp->hasAttributes()){
foreach ($temp->attributes as $attr) {
$tempA[\'attributes\'][$attr->nodeName] = $attr->nodeValue;
}
}
$this->ids[$id] = $tempA;
return $tempA;
}else{
$tempA = $this->_dom->saveXML($temp);
$this->ids[$id] = $tempA;
return $tempA;
}
}
/**
* getElementById using XpathDom
*
* @access public
* @since 0.1
* @author Ohad Raz <[email protected]>
*
* @param string $id elemet id to get
*
* @return domDocument object
*/
public function getElementById($id){
$xpath = new DOMXPath($this->_dom);
return $xpath->query("//*[@id=\'$id\']")->item(0);
}
}//end class
}//end if
现在,一旦您有了这个类,您就可以非常简单地使用它来做您想要做的事情,例如:
$WebPage = new webpage_info(\'http://en.bainternet.info\');
if ($WebPage->_found){
//page title
$title = $WebPage->Title();
//page description
$description = $WebPage->Description();
$imgTags = $WebPage->getEByTagName(\'img\',\'array\');
//here $imgTags will hold an array of all images on that page with all of the attributes as a nested array.
}
所以只剩下很少的事情要做了,请记住它将在WordPress内部使用HTTP API,在WordPress外部使用PHP CURL。