
542 lines
15 KiB
Raw Normal View History

* Base include file for SimpleTest
* @package SimpleTest
* @subpackage WebTester
* @version $Id: page.php 1938 2009-08-05 17:16:23Z dgheath $
* include other SimpleTest class files
require_once(dirname(__FILE__) . '/http.php');
require_once(dirname(__FILE__) . '/php_parser.php');
require_once(dirname(__FILE__) . '/tag.php');
require_once(dirname(__FILE__) . '/form.php');
require_once(dirname(__FILE__) . '/selector.php');
* A wrapper for a web page.
* @package SimpleTest
* @subpackage WebTester
class SimplePage {
private $links = array();
private $title = false;
private $last_widget;
private $label;
private $forms = array();
private $frames = array();
private $transport_error;
private $raw;
private $text = false;
private $sent;
private $headers;
private $method;
private $url;
private $base = false;
private $request_data;
* Parses a page ready to access it's contents.
* @param SimpleHttpResponse $response Result of HTTP fetch.
* @access public
function __construct($response = false) {
if ($response) {
} else {
* Extracts all of the response information.
* @param SimpleHttpResponse $response Response being parsed.
* @access private
protected function extractResponse($response) {
$this->transport_error = $response->getError();
$this->raw = $response->getContent();
$this->sent = $response->getSent();
$this->headers = $response->getHeaders();
$this->method = $response->getMethod();
$this->url = $response->getUrl();
$this->request_data = $response->getRequestData();
* Sets up a missing response.
* @access private
protected function noResponse() {
$this->transport_error = 'No page fetched yet';
$this->raw = false;
$this->sent = false;
$this->headers = false;
$this->method = 'GET';
$this->url = false;
$this->request_data = false;
* Original request as bytes sent down the wire.
* @return mixed Sent content.
* @access public
function getRequest() {
return $this->sent;
* Accessor for raw text of page.
* @return string Raw unparsed content.
* @access public
function getRaw() {
return $this->raw;
* Accessor for plain text of page as a text browser
* would see it.
* @return string Plain text of page.
* @access public
function getText() {
if (! $this->text) {
$this->text = SimplePage::normalise($this->raw);
return $this->text;
* Accessor for raw headers of page.
* @return string Header block as text.
* @access public
function getHeaders() {
if ($this->headers) {
return $this->headers->getRaw();
return false;
* Original request method.
* @return string GET, POST or HEAD.
* @access public
function getMethod() {
return $this->method;
* Original resource name.
* @return SimpleUrl Current url.
* @access public
function getUrl() {
return $this->url;
* Base URL if set via BASE tag page url otherwise
* @return SimpleUrl Base url.
* @access public
function getBaseUrl() {
return $this->base;
* Original request data.
* @return mixed Sent content.
* @access public
function getRequestData() {
return $this->request_data;
* Accessor for last error.
* @return string Error from last response.
* @access public
function getTransportError() {
return $this->transport_error;
* Accessor for current MIME type.
* @return string MIME type as string; e.g. 'text/html'
* @access public
function getMimeType() {
if ($this->headers) {
return $this->headers->getMimeType();
return false;
* Accessor for HTTP response code.
* @return integer HTTP response code received.
* @access public
function getResponseCode() {
if ($this->headers) {
return $this->headers->getResponseCode();
return false;
* Accessor for last Authentication type. Only valid
* straight after a challenge (401).
* @return string Description of challenge type.
* @access public
function getAuthentication() {
if ($this->headers) {
return $this->headers->getAuthentication();
return false;
* Accessor for last Authentication realm. Only valid
* straight after a challenge (401).
* @return string Name of security realm.
* @access public
function getRealm() {
if ($this->headers) {
return $this->headers->getRealm();
return false;
* Accessor for current frame focus. Will be
* false as no frames.
* @return array Always empty.
* @access public
function getFrameFocus() {
return array();
* Sets the focus by index. The integer index starts from 1.
* @param integer $choice Chosen frame.
* @return boolean Always false.
* @access public
function setFrameFocusByIndex($choice) {
return false;
* Sets the focus by name. Always fails for a leaf page.
* @param string $name Chosen frame.
* @return boolean False as no frames.
* @access public
function setFrameFocus($name) {
return false;
* Clears the frame focus. Does nothing for a leaf page.
* @access public
function clearFrameFocus() {
* TODO: write docs
function setFrames($frames) {
$this->frames = $frames;
* Test to see if link is an absolute one.
* @param string $url Url to test.
* @return boolean True if absolute.
* @access protected
protected function linkIsAbsolute($url) {
$parsed = new SimpleUrl($url);
return (boolean)($parsed->getScheme() && $parsed->getHost());
* Adds a link to the page.
* @param SimpleAnchorTag $tag Link to accept.
function addLink($tag) {
$this->links[] = $tag;
* Set the forms
* @param array $forms An array of SimpleForm objects
function setForms($forms) {
$this->forms = $forms;
* Test for the presence of a frameset.
* @return boolean True if frameset.
* @access public
function hasFrames() {
return count($this->frames) > 0;
* Accessor for frame name and source URL for every frame that
* will need to be loaded. Immediate children only.
* @return boolean/array False if no frameset or
* otherwise a hash of frame URLs.
* The key is either a numerical
* base one index or the name attribute.
* @access public
function getFrameset() {
if (! $this->hasFrames()) {
return false;
$urls = array();
for ($i = 0; $i < count($this->frames); $i++) {
$name = $this->frames[$i]->getAttribute('name');
$url = new SimpleUrl($this->frames[$i]->getAttribute('src'));
$urls[$name ? $name : $i + 1] = $this->expandUrl($url);
return $urls;
* Fetches a list of loaded frames.
* @return array/string Just the URL for a single page.
* @access public
function getFrames() {
$url = $this->expandUrl($this->getUrl());
return $url->asString();
* Accessor for a list of all links.
* @return array List of urls with scheme of
* http or https and hostname.
* @access public
function getUrls() {
$all = array();
foreach ($this->links as $link) {
$url = $this->getUrlFromLink($link);
$all[] = $url->asString();
return $all;
* Accessor for URLs by the link label. Label will match
* regardess of whitespace issues and case.
* @param string $label Text of link.
* @return array List of links with that label.
* @access public
function getUrlsByLabel($label) {
$matches = array();
foreach ($this->links as $link) {
if ($link->getText() == $label) {
$matches[] = $this->getUrlFromLink($link);
return $matches;
* Accessor for a URL by the id attribute.
* @param string $id Id attribute of link.
* @return SimpleUrl URL with that id of false if none.
* @access public
function getUrlById($id) {
foreach ($this->links as $link) {
if ($link->getAttribute('id') === (string)$id) {
return $this->getUrlFromLink($link);
return false;
* Converts a link tag into a target URL.
* @param SimpleAnchor $link Parsed link.
* @return SimpleUrl URL with frame target if any.
* @access private
protected function getUrlFromLink($link) {
$url = $this->expandUrl($link->getHref());
if ($link->getAttribute('target')) {
return $url;
* Expands expandomatic URLs into fully qualified
* URLs.
* @param SimpleUrl $url Relative URL.
* @return SimpleUrl Absolute URL.
* @access public
function expandUrl($url) {
if (! is_object($url)) {
$url = new SimpleUrl($url);
$location = $this->getBaseUrl() ? $this->getBaseUrl() : new SimpleUrl();
return $url->makeAbsolute($location->makeAbsolute($this->getUrl()));
* Sets the base url for the page.
* @param string $url Base URL for page.
function setBase($url) {
$this->base = new SimpleUrl($url);
* Sets the title tag contents.
* @param SimpleTitleTag $tag Title of page.
function setTitle($tag) {
$this->title = $tag;
* Accessor for parsed title.
* @return string Title or false if no title is present.
* @access public
function getTitle() {
if ($this->title) {
return $this->title->getText();
return false;
* Finds a held form by button label. Will only
* search correctly built forms.
* @param SimpleSelector $selector Button finder.
* @return SimpleForm Form object containing
* the button.
* @access public
function getFormBySubmit($selector) {
for ($i = 0; $i < count($this->forms); $i++) {
if ($this->forms[$i]->hasSubmit($selector)) {
return $this->forms[$i];
return null;
* Finds a held form by image using a selector.
* Will only search correctly built forms.
* @param SimpleSelector $selector Image finder.
* @return SimpleForm Form object containing
* the image.
* @access public
function getFormByImage($selector) {
for ($i = 0; $i < count($this->forms); $i++) {
if ($this->forms[$i]->hasImage($selector)) {
return $this->forms[$i];
return null;
* Finds a held form by the form ID. A way of
* identifying a specific form when we have control
* of the HTML code.
* @param string $id Form label.
* @return SimpleForm Form object containing the matching ID.
* @access public
function getFormById($id) {
for ($i = 0; $i < count($this->forms); $i++) {
if ($this->forms[$i]->getId() == $id) {
return $this->forms[$i];
return null;
* Sets a field on each form in which the field is
* available.
* @param SimpleSelector $selector Field finder.
* @param string $value Value to set field to.
* @return boolean True if value is valid.
* @access public
function setField($selector, $value, $position=false) {
$is_set = false;
for ($i = 0; $i < count($this->forms); $i++) {
if ($this->forms[$i]->setField($selector, $value, $position)) {
$is_set = true;
return $is_set;
* Accessor for a form element value within a page.
* @param SimpleSelector $selector Field finder.
* @return string/boolean A string if the field is
* present, false if unchecked
* and null if missing.
* @access public
function getField($selector) {
for ($i = 0; $i < count($this->forms); $i++) {
$value = $this->forms[$i]->getValue($selector);
if (isset($value)) {
return $value;
return null;
* Turns HTML into text browser visible text. Images
* are converted to their alt text and tags are supressed.
* Entities are converted to their visible representation.
* @param string $html HTML to convert.
* @return string Plain text.
* @access public
static function normalise($html) {
$text = preg_replace('#<!--.*?-->#si', '', $html);
$text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
$text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
$text = preg_replace('#<[^>]*>#', '', $text);
$text = html_entity_decode($text, ENT_QUOTES);
$text = preg_replace('#\s+#', ' ', $text);
return trim(trim($text), "\xA0"); // TODO: The \xAO is a &nbsp;. Add a test for this.