Mostly Ramblings...: Automatically Backing up a Blogger.com Blog

I started writing about my blog backup solution in a previous post. In that post I briefly discussed the Blogger API that I used. In this post I’d like to go over the rest.

The Solution

The solution is simple, unimpressive and consist of only a few classes. I discussed the actual API calls in the previous post. Those are the heart of the Exporter and Authenticator classes. Here’s the rest of the Authenticator class:

public class Authenticator
{
private static ILog logger = LogManager.GetLogger(typeof(Authenticator));

private string _email;
private string _password;

private int _timeoutInMs = 100000;

public Authenticator(string email, string password)
{
_email = email;
_password = password;
}

/// <summary>
/// The request timeout in milliseconds
/// </summary>
public int TimeoutInMs
{
get { return _timeoutInMs; }
set
{
_timeoutInMs = value;
if (logger.IsDebugEnabled) logger.Debug("Authentication request timeout changed to: " + this._timeoutInMs);
}
}

private void CheckRequiredConfiguration()
{
if (string.IsNullOrEmpty(this._email) || string.IsNullOrEmpty(this._password))
throw new ConfigurationErrorsException("Either the username or password has not been supplied. Please check the configuration.");
}

/// <summary>
/// Make an authentication request
/// </summary>
/// <returns>an instance of <see cref="AuthenticationResult"/> containing the result of the authentication request</returns>
/// <exception cref="InvalidDataException">Occurs if no response or an unexpected response is received from the Blogger service.</exception>
/// <exception cref="AuthenticationException">Occurs if the status code is 'OK' but no Auth token was returned.</exception>
public AuthenticationResult Authenticate()
{
this.CheckRequiredConfiguration();

HttpWebRequest authenticationRequest = CreateAuthenticationRequest();

HttpWebResponse response;
try
{
response = (HttpWebResponse)authenticationRequest.GetResponse();
}
catch (WebException webException)
{
response = webException.Response as HttpWebResponse;

}

if (null == response) throw new InvalidDataException("An invalid response was received from the service.");

var result = new AuthenticationResult();

var status = response.StatusCode.ToString();
if (logger.IsDebugEnabled) logger.Debug("Authentication response result code: " + status);
result.Status = status;

var sr = new StreamReader(response.GetResponseStream());
string responseBody = sr.ReadToEnd();
if (logger.IsTraceEnabled) logger.Trace("Authentication response body: " + responseBody);
response.Close();

if (result.Status == "OK")
{
string authTokenValue = ParseAuthenticationToken(responseBody);
result.Token = this.BuildAuthToken(this._email, authTokenValue);
}

return result;

}

private AuthorizationToken BuildAuthToken(string email, string authValue)
{
return new AuthorizationToken(email, authValue);
}

private string ParseAuthenticationToken(string responseBody)
{
int authTokenPosition = responseBody.IndexOf("Auth=");
if (-1 == authTokenPosition)
{
var ex =
new AuthenticationException(
"Authentication request returned 'OK' but auth token was missing. Check exception data for authentication response.");
ex.Data.Add("authResponse", responseBody);
throw ex;
}

return responseBody.Substring(authTokenPosition + 5);
}

private HttpWebRequest CreateAuthenticationRequest()
{
var uri = new Uri("https://www.google.com/accounts/ClientLogin");
HttpWebRequest authenticationRequest = (HttpWebRequest)WebRequest.Create(uri);
authenticationRequest.AllowAutoRedirect = false;
authenticationRequest.Method = "POST";
authenticationRequest.ContentType = "application/x-www-form-urlencoded";
authenticationRequest.KeepAlive = false;
authenticationRequest.Expect = string.Empty;
authenticationRequest.Headers.Add("GData-Version", "2");
authenticationRequest.Timeout = this.TimeoutInMs;

var postBody = new StringBuilder();
postBody.Append("accountType=GOOGLE&");
postBody.AppendFormat("Email={0}&", this._email.ToUrlEncoded());
postBody.AppendFormat("Passwd={0}&", this._password.ToUrlEncoded());
postBody.Append("service=blogger&");
postBody.AppendFormat("source={0}", "malevy.net-Blogger.Backup-1".ToUrlEncoded());

if (logger.IsTraceEnabled) logger.Trace("body of post: " + postBody.ToString());

byte[] encodedData = (new ASCIIEncoding()).GetBytes(postBody.ToString());

authenticationRequest.ContentLength = encodedData.Length;
var stream = authenticationRequest.GetRequestStream();
stream.Write(encodedData, 0, encodedData.Length);
stream.Close();

return authenticationRequest;
}
}

The call to Authenticate returns an instance of AuthenticationResult. AuthenticationResult is a simple container that holds a success or failure indicator (shocking – I know!). In the event that the authentication succeeded, the token that was received from the AuthenticationService is wrapped in an instance of AuthorizationToken, along with the email. Those two pieces of information are required by the Exporter:

public class Exporter
{
private string _blogId;
private readonly IArchiveWriter _fileWriter;
private int _timeoutInMs = 100000;

private static ILog logger = LogManager.GetLogger(typeof(Exporter));

public Exporter(string blogId, IArchiveWriter archiveWriter)
{
_blogId = blogId;
_fileWriter = archiveWriter;
}

/// <summary>
/// The request timeout in milliseconds
/// </summary>
public int TimeoutInMs
{
get { return _timeoutInMs; }
set
{
_timeoutInMs = value;
if (logger.IsDebugEnabled) logger.Debug("Export request timeout changed to: " + this._timeoutInMs);
}
}


/// <summary>
/// Begins the export process
/// </summary>
/// <param name="authToken">The authorization token that was returned from the <see cref="Authenticator"/></param>
public void Export(AuthorizationToken authToken)
{
if (authToken == null) throw new ArgumentNullException("authToken");

if (string.IsNullOrEmpty(this._blogId))
throw new ArgumentException("Exporter must be configured with a Blogger blog ID.");

if (null == this._fileWriter) throw new ArgumentException("Exporter must be configured with a writer");

var request = this.CreateExportRequest(authToken);

var response = request.GetResponse();
using (var responseStream = response.GetResponseStream())
{
this._fileWriter.ArchiveBlogStream(responseStream, this._blogId);
}

if (logger.IsInfoEnabled) logger.Info(string.Format("Blogger blog {0} was successfully backed up.", this._blogId));

}

private HttpWebRequest CreateExportRequest(AuthorizationToken token)
{
// I originally wanted to get all the feeds using the archive functionality but I'm having problems getting archive
// to work when authenticating using ClientLogin. I'm going to punt to this method and hope that I get a response
// from the blogger api newsgroup.
const string archiveUriFormat = "http://www.blogger.com/feeds/{0}/posts/full?updated=1990-01-01T00:00:00&orderby=updated";
var uri = new Uri(string.Format(archiveUriFormat, this._blogId));
HttpWebRequest exportRequest = (HttpWebRequest)WebRequest.Create(uri);

exportRequest.AllowAutoRedirect = false;
exportRequest.Method = "GET";
exportRequest.KeepAlive = false;
exportRequest.Expect = string.Empty;
exportRequest.Headers.Add("GData-Version", "2");
exportRequest.Timeout = this.TimeoutInMs;

exportRequest.Headers.Add(token.ToAuthorizationHeader());

return exportRequest;
}

}

If the Export request succeeds, the response is passed to the IArchiveWriter. In my case, this will be an instance of ArchiveFileWriter. ArchiveFileWriter persists the stream to a file (another shocking discovery!). I’ll skip the code for this one.

The driver that pulls all of this together is an instance of the Agent class:

public class Agent
{
private readonly Authenticator _authenticator;
private readonly Exporter _exporter;

private static ILog logger = LogManager.GetLogger(typeof(Agent));

public Agent(Authenticator authenticator, Exporter exporter)
{
if (authenticator == null) throw new ArgumentNullException("authenticator");
if (exporter == null) throw new ArgumentNullException("exporter");

_authenticator = authenticator;
_exporter = exporter;
}

public void Execute()
{
var authenticationResult = this._authenticator.Authenticate();

if ("OK" != authenticationResult.Status)
{
logger.Error("Unable to authenticate with the Blogger service.");
return;
}

try
{
this._exporter.Export(authenticationResult.Token);
}
catch (Exception e)
{
logger.Error("BloggerBackup caught an unhandled exception.", e);
}
}
}

When the Execute() method is called, Agent uses the instance of Authenticator to make the authentication request. If that succeeds, the authorization token is passed to the exporter to retrieve the actual blog contents.

Now, the observant reader (you of course!) may have noticed that Agent accepts an instance of Authenticator and an instance of Exporter in its constructor. In my solution, all of the wiring is done using Spring.Net. Here, I believe, is a good example of using an IOC container to support something other than testing. Although I will admit that taking this approach (i.e. Dependency Injection) does make testing much easier.

I decided to use Spring’s XML configuration. I know that there are many people that are very anti-XML for configuration but I don’t believe that the XML gets out of hand for this implementation. Here’s a portion of the XML that is used to wire all of the components together:

<object id="Backup-Agent" type="BloggerBackup.Agent.Agent, BloggerBackup.Agent">
<constructor-arg name="authenticator" ref="authenticator" />
<constructor-arg name="exporter" ref="exporter" />
</object>

<object id="authenticator" type="BloggerBackup.Agent.Authenticator, BloggerBackup.Agent" >
<constructor-arg name="email" value="${email}" />
<constructor-arg name="password" value="${password}" />
</object>

<object id="exporter" type="BloggerBackup.Agent.Exporter, BloggerBackup.Agent" >
<constructor-arg name="blogId" value="${id-of-blog-to-backup}" />
<constructor-arg name="archiveWriter" ref="fileWriter" />
</object>

<object id="fileWriter" type="BloggerBackup.Agent.ArchiveWriter.ArchiveFileWriter, BloggerBackup.Agent" >
<constructor-arg name="archiveRoot" value="${archive-root-folder}" />
</object>

When I ask for an instance of Agent, Spring.Net is nice enough to fill in the dependencies. Of course, that’s what IOC containers do.

When Configuration is not Configuration

I’ve always felt that there are two types of configuration metadata. The first is the type of metadata that represents a portion of the application. The Spring.Net config file is an example of this. I consider this file to be source code. It is describing the way that my application should be put together. In this case, Spring.Net is a factory that is creating objects for me.

The second type of configuration is “operational information.” For example, the timeout, email, and password values. I expect a support engineer to adjust these values as necessary but I don’t expect him or her to change Spring’s config file.

Why mention this? Well, if you look back at the Spring.Net config file, I’m injecting operational values into the objects. A good example is the instance of ArchiveFileWriter that is given the name of the folder to store the archive files:

<object id="fileWriter" type="BloggerBackup.Agent.ArchiveWriter.ArchiveFileWriter, BloggerBackup.Agent" >
<constructor-arg name="archiveRoot" value="${archive-root-folder}" />
</object>

The funny syntax (${archive-root-folder}) is a place holder that allows me to put the actual value in the app.config file. I’m using Spring.Net’s PropertyPlaceholderConfigurer to retrieve operational values from the <appSettings> section of the app.config file. This way, the support engineer can change those values without having to edit the Spring.Net config file.

Pretty cool eh?

Making things automatic

There’s only one more piece to describe. I’m using an embedded scheduler called Quartz.Net. Now, I’ll be honest. I found using the native Quartz.Net API to be really frustrating. Luckily, Spring.Net has a really nice wrapper around Quartz.Net. In the Spring.Net config file, I define an instance of the SchedulerFactoryObject:

<object id="scheduler-factory" type="Spring.Scheduling.Quartz.SchedulerFactoryObject, Spring.Scheduling.Quartz">
<property name="triggers">
<ref object="Backup-Agent-Schedule"/>
</property>
</object>

To it, I provide an instance of the schedule that I want my backup to occur. I’m using an instance of CronTriggerObject:

<object id="Backup-Agent-Schedule" type="Spring.Scheduling.Quartz.CronTriggerObject, Spring.Scheduling.Quartz">
<property name="JobDetail" ref="backupRunner" />
<property name="CronExpressionString" value="${backup-agent-schedule-asCronExpression}" />
</object>

The CronTriggerObject is supplied with the Cron expression which is being injected from the app.config file. It’s also being given a reference to the JobDetail object. Now, if you look at the same Quartz.Net examples that I saw, you’re shown examples of jobs that derive from JobDetail. I’m not crazy about this approach because it lets Quartz.Net leak into portions of the application where it probably doesn’t really need to be. The Spring.Net wrappers have a good solution to this with their MethodInvokingJobDetailFactoryObject. You configure it to receive an object and the name of a method on the object and the method will be called at the appropriate time:

<object id="backupRunner" type="Spring.Scheduling.Quartz.MethodInvokingJobDetailFactoryObject, Spring.Scheduling.Quartz">
<property name="TargetObject" ref="Backup-Agent" />
<property name="TargetMethod" value="Execute" />
</object>

The Host

The only thing left is to create the Spring.Net Application Context. When the context is created, all the objects will be instantiated, the schedule will start and everything will start working together. Just like a practiced orchestra. For this, I have a class called ContextHost:

public class ContextHost : IDisposable
{
private static ILog Log = LogManager.GetLogger(typeof (ContextHost));

private IApplicationContext _springContext = null;

public void Create()
{

if (Log.IsDebugEnabled) Log.Debug("Starting scheduler");
this._springContext = new XmlApplicationContext(false, "file://~/spring-config.xml");
if (Log.IsInfoEnabled) Log.Info("Scheduler started");
}

public void Close()
{
if (Log.IsDebugEnabled) Log.Debug("Stopping scheduler");

if (null != this._springContext)
{
this._springContext.Dispose();
this._springContext = null;
}

if (Log.IsInfoEnabled) Log.Info("Scheduler stopped");
}

public void Dispose()
{
if (null != this._springContext) this.Close();
GC.SuppressFinalize(this);
}
}

ContextHost is strictly responsible for creating and destroying the ApplicationContext.

OK. I lied. Since I have this solution residing in a Windows Service, I need my service class. It looks like this:

public partial class Host : ServiceBase
{
private static ILog Log = LogManager.GetLogger(typeof (Host));

private ContextHost _contextHost = null;

public Host()
{
InitializeComponent();
}

protected override void OnStart(string[] args)
{
try
{
_contextHost = new ContextHost();
_contextHost.Create();
}
catch (Exception e)
{
_contextHost.Close();
Log.Error("Caught unhandled exception", e);
}
}

protected override void OnStop()
{
if (null != _contextHost) _contextHost.Close();
}
}

When my service starts, ContextHost is used to create the context. When my service stops, the application context is destroyed. Nothing could be simpler.

Mostly Ramblings...

Saturday, January 2, 2010

Automatically Backing up a Blogger.com Blog – Part 2

The Solution

When Configuration is not Configuration

Making things automatic

The Host

No comments:

Post a Comment