OU blog

Personal Blogs

neil

Oh dear

Visible to anyone in the world
Edited by Neil Anderson, Thursday, 2 Feb 2012, 19:14

I didn't make the daily WTF myself, but I have done something similar...

Here's the PHP file...

[There's another file for creating a google sitemap.] Edit: Actually there isn't, I hadn't noticed that I'd rolled three things into one, I wonder where I use the footer stuff?

<?php

include('assets/php/mapClass.php');

$site_root = dirname(__file__);

//my site map
$my_map = new XMLmap($site_root, 'http://neilanderson.freehostia.com', 'map.xml', '/assets/xml/map.xsl');
$my_map->add_omit_dir(array('/assets'));
$my_map->add_omit_file(array('/404.htm', '/blogs/degree/mst121_m263'));
$my_map->google_map = false;
$my_map->include_dir_structure = true;
$my_map->add_meta(array('description'));
$my_map->write_XML($site_root);

//the footer map
$foot_map = new XMLmap($site_root, 'http://neilanderson.freehostia.com', 'assets/xml/dir.xml', '/assets/xml/dir.xsl');
$foot_map->add_omit_dir(array('/assets', '/colophon', '/about'));
$foot_map->add_omit_file(array('/404.htm', '/blogs/degree/mst121_m263/archive.htm'));
$foot_map->google_map = false;
$foot_map->include_dir_structure = true;
$foot_map->write_XML($site_root);

//the googlemap
$google_map = new XMLmap($site_root, 'http://neilanderson.freehostia.com', 'sitemap.xml');
$google_map->add_omit_dir(array('/assets'));
$google_map->add_omit_file(array('/404.htm', '/blogs/degree/mst121_m263'));
$google_map->write_XML($site_root);

//other thoughts maps

//date stuf so that we update current year and month blogs xml...
$dtm = getdate();
$year = $dtm['year'];
$month = $dtm['mon'];
if($month < 10){
$month = '0'.$month;
}
$month = $year.'/'.$month;

//year
$thoughts_map = new XMLmap($site_root, 'http://neilanderson.freehostia.com', 'thoughts/other/'.$year.'/index.xml', '/assets/xml/blog.xsl');
$thoughts_map->add_meta(array('description', 'keywords', 'title'));
$thoughts_map->write_XML('thoughts/other/2009');

//month
$month_thoughts_map = new XMLmap($site_root, 'http://neilanderson.freehostia.com', 'thoughts/other/'.$month.'/index.xml', '/assets/xml/blog.xsl');
$month_thoughts_map->add_meta(array('description', 'keywords', 'title'));
$month_thoughts_map->write_XML('thoughts/other/'.$month);

?>

The Class file...

<?php

class XMLmap{

//PROPERTIES

//PRIVATE

//root got by constructor
protected $root;

//basepath called in constructor
protected $base_path;

//xml/xsl paths called in constructor
protected $xml_path;
protected $xsl_path;

//EXCLUDE STUFF
//directories and files to exclude
protected $omit_dirs = array();
protected $omit_files = array();

//FILES
//file types to include
//by default htm and html
protected $include_types = array('htm');

//METAS TO SCRAPE
protected $use_metas = array();

//THE XML STRING
protected $xml_string = "<?xml version='1.0' encoding='UTF-8'?>";

//PUBLIC

//should we actually include file details
public $include_files = true;
//create dir structure
public $include_dir_structure = false;
//a real google sitemap??
public $google_map = true;


//CONSTRUCTOR FUNCTION
function __construct($root, $base, $xml, $xsl=false){
//root
$this->root = $root;
//basepath
$this->base_path = $base;
//pass the paths to the xml and xsl files
$this->xml_path = $xml;
$this->xsl_path = $xsl;
}

//METHODS

//PUBLIC

//add directories to omit
public function add_omit_dir($dirs){
//an array of dirs to omit
foreach($dirs as $d){
$this->omit_dirs[] = $d;
}
}

//add files to omit
public function add_omit_file($files){
//an array of files to omit
foreach($files as $f){
$this->omit_files[] = $f;
}
}

//add file types to include
public function add_file_type($types){
//an array of file types
foreach($types as $t){
$this->include_types[] = $t;
}   
}

//add metas to scrape
public function add_meta($metas){
//an array of meta
foreach($metas as $m){
$this->use_metas[] = $m;
}   
}   

//PRIVATE

protected function parse_file($file){
//get file details
//file type
$type = substr($file, -3);
$inc = false;
foreach($this->include_types as $ty){//omit
if($type == $ty){

$inc = true;
}
}
if(!$inc){
return;
}
//omit file??
foreach($this->omit_files as $key=>$f){
if($this->root.$f == $file) return;
}       
//the basics
$file_path = str_replace($this->root, $this->base, $file);
$this->xml_string .= '<url><loc>'.$file_path.'</loc>';
$this->xml_string .= '<lastmod>'.date('Y-m-d', filectime($file)).'</lastmod>';
//any metas
if($this->use_metas){
$metas = get_meta_tags($file);
foreach($this->use_metas as $key=>$m){//any meta tags to include
if(isset($metas[$m])){
$this->xml_string .= '<'.$this->use_metas[$key].'>'.$metas[$m].'</'.$this->use_metas[$key].'>';
}
}   
}
$this->xml_string .= '</url>';
}

protected function parse_dir($dir){
//exclude directory
foreach($this->omit_dirs as $key=>$d){
if($this->root.$d == $dir) return;
}
if($this->include_dir_structure){
//include directory info
//opening tag   
$nameAt = strrchr($dir, '/');
$name = str_replace('_', ' ', substr($nameAt, 1));
$dir_path = str_replace($this->root, $this->base, $dir);
$this->xml_string .= "<sitemap:directory path='". $dir_path ."/'";
if($name){//is there a name ie not root
$this->xml_string .= " name='". $name."'";
}
$this->xml_string .= ">";
}
//get directory contents
$contents = array_diff(scandir($dir), array('.', '..'));
//loop thru contents
foreach($contents as $key=>$item){
$path = $dir.'/'.$item;
if(is_dir($path)){
$this->xml_string .= $this->parse_dir($path);
}
elseif(is_file($path) && $this->include_files){
$this->xml_string .= $this->parse_file($path);
}
}
if($this->include_dir_structure){//close directory tag
$this->xml_string .= '</sitemap:directory>';//closing tag
}
}

protected function write_file(){
//write the xml file
$file = fopen($this->xml_path, 'w');
fwrite($file, $this->xml_string);
fwrite($file, "</urlset>");
fclose($file);
}

//PUBLIC

public function write_XML($dir){
//the controller function
if($this->xsl_path){//include a stylesheet
$this->xml_string .=  "<?xml-stylesheet type='text/xsl' href='". $this->xsl_path."' ?>";
}
$this->xml_string .= "<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'";
if(!$this->google_map){
$this->xml_string .=  " xmlns:sitemap='http://neilanderson.freehostia.com'";
}
$this->xml_string .= ">";
$this->parse_dir($dir); //edit: this is the recursive bit that makes it all work.
$this->write_file();
echo 'done';
}

}

?>

And the XSLT...

I wrote it about two years ago. The funny thing is that it works. See it here...

Permalink 4 comments (latest comment by Anthony Dooley, Friday, 3 Feb 2012, 09:45)
Share post

This blog might contain posts that are only visible to logged-in users, or where only logged-in users can comment. If you have an account on the system, please log in for full access.

Total visits to this blog: 252527