I post photos to Flickr from time to time, and then write blog articles about the photos. The blog articles get written days, weeks, sometimes months in advance of when they’re scheduled to appear on my blog … which makes it a tad difficult to add a link from a photo to all of the blog articles that mention it.

So a couple of weekends ago I knocked up a very crude script that uses the Flickr API (via phpFlickr) to work through all of the published blog posts and make sure each of my Flickr photos has links back to each blog post that mention it. I’m posting it here in the public domain. Hopefully someone will find it a useful starting point to do something similar for their own blog.


<?php

require_once('phpflickr-3.0/phpFlickr.php');

$flickrApiKey = '<your Flickr API key>';
$flickrSecret = '<your Flickr API secret>';
$flickrToken  = '<your Flickr auth token>';

$f = new phpFlickr($flickrApiKey, $flickrSecret);
$f->setToken($flickrToken);
$f->enableCache('fs', '/tmp', 3600);

// first step - find the first published blog post
$url = 'http://blog.stuartherbert.com/photography/';
$rawHtml = file_get_contents($url);
preg_match('/<h2 id="post-([0-9]+)">/', $rawHtml, $matches);

$blogPosts = array();
$flickrPhotos = array();

$latestPost = $matches[1];
$nextPost = $url . '?p=' . $latestPost;

function updatePhotos($photoIndex, $flickrPhotos, $blogPosts, $f)
{
	foreach ($photoIndex as $photoId => $flickrPhoto)
	{
		// we must rewrite the description
		preg_match('|(.*)Copyright |s', $flickrPhoto['description'], $matches);
		if (isset($matches[1]))
		{
			$description = $matches[1];
		}
		else
		{
			$description = '';
		}
		$description .= 'Copyright (c) Stuart Herbert. <a href="http://blog.stuartherbert.com/photography/" rel="nofollow">Blog</a> | <a href="http://twitter.com/stuherbert" rel="nofollow">Twitter</a> | <a href="http://www.facebook.com/stuartherbert" rel="nofollow">Facebook</a>' . "\n"
		     	. 'Photography: <a href="http://blog.stuartherbert.com/photography/merthyr-road" rel="nofollow">Merthyr Road</a> | <a href="http://blog.stuartherbert.com/photography/daily-desktop-wallpaper" rel="nofollow">Daily Desktop Wallpaper</a> | <a href="http://blog.stuartherbert.com/photography/project-25x9" rel="nofollow">25x9</a> | <a href="http://twitter.com/stuphotos" rel="nofollow">Twitter</a>.' . "\n\n";
	
		if (count($flickrPhoto['blogPosts']) == 1)
		{
			$description .= 'Want to know more about this photo? See this blog entry:' . "\n\n";
		}
		else
		{
			$description .= "Want to know more about this photo? See these blog entries:\n\n";
		}
	
		foreach ($flickrPhoto['blogPosts'] as $postUrl => $blogPost)
		{
			$description .= '* <a href="' . $postUrl . '">' . $blogPost['title'] . "</a>\n";
		}
	
		// description is made ... now to upload it
		echo "Photo: " . $photoId . ' :: ' . $flickrPhoto['title'] . "\n";
		echo "URL  : " . $flickrPhoto['url'] . "\n";
		echo "Old  : " . $flickrPhoto['description'] . "\n";
		echo "New  : " . $description . "\n";

		echo "\nPushing changes to Flickr ...";
		$f->photos_setMeta($photoId, $flickrPhoto['title'], $description);
		echo " done\n";
	}
}

while ($nextPost !== null)
{
	$photoIndex = array();

	echo "Downloading $nextPost ...";
	$rawHtml = file_get_contents($nextPost);
	echo " done\n";
	if (!$rawHtml)
	{
		die("Unable to download HTML for URL: " . $nextPost . "\n");
	}

	preg_match('|<h2 id="post-([0-9]+)">.*<a href="(.*)".*>(.*)</a>|Us', $rawHtml, $matches);
	$postUrl = $matches[2];
	$title = $matches[3];
	echo "Blog post title is: $title\n";
	echo "Blog post url   is: $postUrl\n";

	preg_match('|<a href="(.*)" rel="prev">Previous Post</a>|', $rawHtml, $matches);
	if (isset($matches[1]))
	{
		$nextPost = $matches[1];
	}
	else
	{
		$nextPost = null;
	}

	preg_match('|<div class="entry">(.*)<div style="clear:both;">|Us', $rawHtml, $matches);
	if (!isset($matches[1]))
		die("regex failed again\n");
	$entryHtml = $matches[1];

	preg_match_all('|(http://www.flickr.com/photos/stuartherbert/[0-9]+/)"|', $entryHtml, $matches);
	$blogPosts[$postUrl]['url']     = $postUrl;
	$blogPosts[$postUrl]['title']   = $title;
	$blogPosts[$postUrl]['matches'] = $matches;

	foreach ($matches[1] as $flickrPhoto)
	{
		$parts = explode('/', $flickrPhoto);
		$photoId = $parts[count($parts)-2];
		$photoInfo = $f->photos_getInfo($photoId);

		$flickrPhotos[$photoId]['url'] = $flickrPhoto;
		$flickrPhotos[$photoId]['title'] = $photoInfo['title'];
	        $flickrPhotos[$photoId]['description'] = $photoInfo['description'];
		$flickrPhotos[$photoId]['blogPosts'][$postUrl] = $blogPosts[$postUrl];

		// note the photos we need to update because we have
		// seen this post
		$photoIndex[$photoId] = $flickrPhotos[$photoId];

		echo "- Photo: " . $photoInfo["title"] . "\n";
	}

	updatePhotos($photoIndex, $flickrPhotos, $blogPosts, $f);
}

echo "\n\n";
echo "Photo scraping complete!!\n\n";

// when we get to here, we have photos to go and update on flickr
?>

Comments are closed.