Scott Hurring » Code » PHP » URL Title Lookup

Download Code
download
url_title_v0.2.tgz
v0.2 beta · Dec 23, 2005

Description

This code will attempt to figure out the url's <title> tag so that links on your site will always have the most current and correct title.

The code grabs the first chunk of data from a URL, looks for <title>, and displays an <a href> tag using the URL's current title.

It is a neat way to always have your links use the most current page title of the pages you link to, although it can really slow the loading of *your* pages down if you link to slow/unavailable sites.

Useage & Examples

include('url_title_v0.1.php');

print url('hurring.com') ."\n";
// prints <a href='http://hurring.com'>Hurring.com</a>

print url('google.com') ."\n";
// prints <a href='http://google.com'>Google</a>

print url('hurring.com', 'Append me') ."\n";
// prints <a href='http://hurring.com'>Hurring.com: Append me</a>

print url('/code/php/') ."\n";
// prints <a href='http://hurring.com/code/php/'>Hurring.com: Code: PHP</a>

print url('/code/php/', "Append me") ."\n";
// prints <a href='http://hurring.com/code/php/'>Hurring.com: Code: PHP: Append me</a>

print url('groups.yahoo.com/groups/alhkjter/') ."\n";
// prints <a href='http://groups.yahoo.com/groups/alhkjter/'>groups.yahoo.com/groups/alhkjter/</a>

print url('groups.yahoo.com/groups/alhkjter/', 'Pirates') ."\n";
// prints <a href='http://groups.yahoo.com/groups/alhkjter/'>Pirates</a>

print url('garbage') ."\n";
// prints (Broken Link: garbage)

print url('garbage', 'Pirates') ."\n";
// prints (Broken Link: garbage: Pirates)


View source 'url_title.php'


<?
/**
 * URL title looker-upper.
 *
 * This code will grab a chunk of data from a URL, look at the <title>
 * and display an <a href> tag using the URL's current title.
 *
 * It is a neat way to always have your links use the most current page
 * title of the pages you link to, although it can really slow the loading
 * of *your* pages down if you link to slow/unavailable sites.
 *
 * @authour Scott Hurring (scott at hurring dot com)
 * @version 0.2 (Dec 23, 2005) Merry christmas!
 * http://hurring.com/code/php/url_title/
 * @license GPL
 */

// Your site's domain, used if you give a url that's located
// on the current server: like "/code/index.php"
$url_site_domain 'hurring.com';

/*
function url()

$link        the URL whose title you want to fetch
$text        optional text to append to the page title
*/
function url($link$text='')
{
    
// Ensure $link is a valid URL        
    
$url url_resolve($link);
    
    
$fp = @fopen($url'r');
    if (!
$fp) {
        
$title = (!$text $link $text);
        return 
"(Broken Link: $link". ($text ": {$text}" "") .")";
    }

    
// Grab <title>*</title> from the first "chunk" of data
    
$title url_grab_title($fp);

    
// No <title> tag in first chunk...    
    
if (!$title) {
        
$title = (!$text $link $text);
        return 
"<a href='$url'>$title</a>";
    }

    
// User gave me some text to append to the link title
    
if ($text) {    
        
$title .= ': '$text;
    }
    
    return 
"<a href='$url'>$title</a>";
}

/*
Attempt to turn a partial URL (e.g. "server.com" or "/code/")
into a valid URL (e.g. "http://server.com/" or "http://server.com/code/")
*/
function url_resolve($url)
{
    global 
$url_site_domain;
    
    if (!
preg_match('/^(http|ftp):/'$url)) {
        
// assume it's not "server.tld" and instead "/dir/file.ext"
        
if (!preg_match('/\.(com|net|org|co|uk|edu|info|biz)/'$url)) {
            
$url $url_site_domain .'/'$url;    
        }
    }
    return 
'http://'preg_replace('/\/\//''/'$url);
}

// Fetch a "chunk" of data and look for <title> in it
function url_grab_title($fp)
{
    
// How many bytes to grab in one chunk.
    // Most sites seem to have <title> within 512
    
$chunk_size 512;

    
$chunk fread($fp$chunk_size);
    
$chunk preg_replace("/(\n|\r)/"''$chunk);

    
// Look for <title>(.*?)</title> in the text
    
if (preg_match('/<title>(.*?)<\/title>/i'$chunk$matches)) {
        return 
$matches[1];
    }

    return 
null;
}

// Run this to test the code    
function url_test()
{
    print 
url('hurring.com') ."\n";
    
// <a href='http://hurring.com'>Hurring.com</a>
    
print url('google.com') ."\n";
    
// <a href='http://google.com'>Google</a>
    
print url('hurring.com''Append me') ."\n";
    
// <a href='http://hurring.com'>Hurring.com: Append me</a>
    
print url('/code/php/') ."\n";
    
// <a href='http://hurring.com/code/php/'>Hurring.com: Code: PHP</a>
    
print url('/code/php/'"Append me") ."\n";
    
// <a href='http://hurring.com/code/php/'>Hurring.com: Code: PHP: Append me</a>
    
print url('groups.yahoo.com/groups/alhkjter/') ."\n";
    
// <a href='http://groups.yahoo.com/groups/alhkjter/'>groups.yahoo.com/groups/alhkjter/</a>
    
print url('groups.yahoo.com/groups/alhkjter/''Pirates') ."\n";
    
// <a href='http://groups.yahoo.com/groups/alhkjter/'>Pirates</a>
    
print url('garbage') ."\n";
    
// (Broken Link: garbage)
    
print url('garbage''Pirates') ."\n";
    
// (Broken Link: garbage: Pirates)
}
?>