Source Rally PHP Community Scripts .. Sign up .. Login
This class is used for organizing the relations between users, comments and news on digg.
It is dependent on following two classes:
http://www.sourcerally.net/Scripts/30-httpResponse-class
http://www.sourcerally.net/Scripts/29-Crawl-bot-for-PHP
Access: Public      Tags: digg, crawl, bot, scraping, curl, DOMDocument
Add to favourites       Subscribe comments       Copy code       Bookmark
<?php
/*
Examples of use:

//Get details of a specific news
$details = digg::fetchNews($url);
print_r($details);

//Get user details
$details = digg::fetchProfile($username);
print_r($details);

//Get all friends of a user
$array = digg::fetchFriends($username);
print_r($array);

//Get all news dugged by a user
$array = digg::fetchNewsDugged($username);
print_r($array);

//Get all news submitted by a user
$array = digg::fetchNewsSubmitted($username);
print_r($array);

//Get all news on an url and subpages to that url
$array = digg::fetchNewsByUrl("http://www.digg.com/news/popular/24hours");
print_r($array);
*/


class digg
{
    function 
countComments($url)
    {
        
$bot = new bot();
        
$response $bot->get($url);
        if(
$response)
        {
            
$doc = new DOMDocument();
            
$doc->loadHTML($response->content);
            
$elem $doc->getElementById('diggs-strong-1');
            return 
$elem->textContent;
        }
    }
    
    function 
userToUrl($user)
    {
        
$user str_replace(' ','.',$user);
        
$user urlencode($user);
        return 
$user;
    }
    
    function 
fetchNews($url)
    {
        
$bot = new bot();
        
$response $bot->get($url);
        
$object = new stdClass;
        
$object->url $url;
        
$object->comments = array();
        if(
$response)
        {
            
$doc = new DOMDocument();
            @
$doc->loadHTML($response->content);//hiding warnings!
            
$object->title $doc->getElementById('title')->textContent;
            
$object->comments $doc->getElementById('diggs-strong-1')->textContent;
            
$object->description $doc->getElementsByTagName('p')->item(0)->textContent;
            
$details $doc->getElementById('enclosure1');
            
$links $details->getElementsByTagName('a');
            foreach(
$links as $k => $v)
            {
                
$href $v->attributes->getNamedItem('href')->value;
                if(!isset(
$object->user)&&substr($href,0,7)=='/users/')
                {
                    
$object->user substr($href,7);
                }
                elseif(!isset(
$object->category)&&substr($href,0,1)=='/'&&substr($href,0,7)!='/users/')
                {
                    
$object->category substr($href,1);
                    break;
                }
            }
            
$str $doc->getElementById('sub-nav')->getElementsByTagName('span')->item(0)->textContent;
            
$str explode('(',$str);
            
$str substr($str[1],0,-1);
            
$object->commentsAdded =$str;
        }
        return 
$object;
    }
    
    function 
fetchProfile($username)
    {
        
$bot = new bot();
        
$url 'http://www.digg.com/users/'.digg::userToUrl($username).'/profile';
        
$response $bot->get($url);
        
$object = new stdClass;
        
$object->user $username;
        
$doc = new DOMDocument();
        
$doc->loadHTML($response->content);
        
$mappings = array(
        
'Overall Ranking:'=>'ranking',
'News & Videos Dugg:'=>'itemsDugg',
'News & Videos Submitted:'=>'itemsSubmitted',
'News & Videos Made Popular:'=>'itemsPopular',
'Popular Stories Ratio:'=>'popularRatio',
'Profile Views:'=>'profileViews',
'Real Name:'=>'realName',
'Location:'=>'location',
'AIM/Yahoo/MSN/ICQ/gTalk:'=>'im',
'My Website:'=>'url',
'Member Since:'=>'signedUp');
        foreach(
$doc->getElementsByTagName('dt') as $v)
        {
            if(isset(
$mappings[$v->textContent]))
            {
                
$object->$mappings[$v->textContent]=$v->nextSibling->nextSibling->textContent;
            }
        }
        return 
$object;
    }
    
    function 
fetchNewsSubmitted($username)
    {
        
$url 'http://www.digg.com/users/'.digg::userToUrl($username).'/news/submitted';
        return 
digg::fetchNewsByUrl($url);
    }
    
    function 
fetchNewsDugged($username)
    {
        
$url 'http://www.digg.com/users/'.digg::userToUrl($username).'/news/dugg';
        return 
digg::fetchNewsByUrl($url);
    }
    
    function 
fetchNewsByUrl($url)
    {
        
$bot = new bot();
        
$response $bot->get($url);
        
$doc = new DOMDocument();
        @
$doc->loadHTML($response->content);
        
$pages digg::getNumberOfPages($doc);
        
$news digg::extractNews($doc);
        for(
$i=1;$i<$pages+1;$i++)
        {
            if(
$i>1)
            {
                
$sUrl $url.'/page'.$i;
                
$doc = new DOMDocument();
                
$response $bot->get($sUrl);
                @
$doc->loadHTML($response->content);
                
$news array_merge($news,digg::extractNews($doc));
            }
        }
        return 
$news;
    }
    
    function 
extractNews($doc)
    {
        
$news = array();
        
//div class=news-body
        //external url
        //local url
        //title
        
foreach($doc->getElementsByTagName('div') as $v)
        {
            if(
$v->getAttribute('class')=='news-body')
            {
                
                
$object = new stdClass;
                foreach(
$v->getElementsByTagName('a') as $k => $v)
                {
                    
$href $v->getAttribute('href');
                    if(
substr($href,0,7)=='http://' && $object->url=="")
                    {
                        
$object->title $v->textContent;
                        
$object->url $href;
                    }
                    if(
$v->getAttribute('class')=='more')
                    {
                        
$object->diggUrl $href.'<br>';
                        continue;
                    }
                }
                
$news[] = $object;
            }
        }
        return 
$news;
    }
    
    function 
getNumberOfPages($doc)
    {
        
$pages 0;
        foreach(
$doc->getElementsByTagName('div') as $v)
        {
            if(
$v->getAttribute('class')=='pages')
            {
                foreach(
$v->getElementsByTagName('a') as $v)
                {
                    if(
is_numeric($v->textContent)&&$v->textContent>$pages)
                    {
                        
$pages $v->textContent;
                    }
                }
            }
        }
        return 
$pages;
    }
    
    function 
fetchFriends($username)
    {
        
$bot = new bot();
        
$url 'http://www.digg.com/users/'.digg::userToUrl($username).'/friends/list';
        
$response $bot->get($url);
        
$object = new stdClass;
        
$object->user $username;
        
$doc = new DOMDocument();
        @
$doc->loadHTML($response->content);//hide errors
        
$pages digg::getNumberOfPages($doc);
        
        
$friends digg::extractFriends($doc);
        for(
$i=1;$i<$pages+1;$i++)
        {
            if(
$i>1)
            {
                
$url $url.'/page'.$i;
                
$doc = new DOMDocument();
                
$response $bot->get($url);
                @
$doc->loadHTML($response->content);
                
$friends array_merge($friends,digg::extractFriends($doc));
            }
        }
        return 
$friends;
    }
    
    function 
extractFriends($doc)
    {
        
$friends = array();
        foreach(
$doc->getElementsByTagName('a') as $v)
        {
            
$href $v->getAttribute('href');
            if(
substr($href,0,7)=='/users/')
            {
                
$paths explode('/',substr($href,1));
                if(
count($paths)==2)
                {
                    if(!isset(
$friends[$paths[1]]))
                    {
                        
$friends[$paths[1]]=$paths[1];
                    }
                }
            }
        }
        return 
$friends;
    }
}
?>
Add to favourites       Subscribe comments       Copy code       Bookmark
Sign up to add your own comment here!

Shared by:

regin

Mail user Add to friends
All user contributed content is available under the unless specified otherwise.
Remaining copyrights Regin Gaarsmand 2006-2008
About www.SourceRally.net