Hakyll code for this site

I am most greatful to Danny Su for putting the Hakyll 4 code for his blog on github and allowing others to use it. As you will see, this site borrows heavily from him.

This file is literate haskell; you can just copy and past the whole thing.

> {-# LANGUAGE OverloadedStrings #-}
> import Hakyll
> import Text.Pandoc (writerReferenceLinks)
> 
> import Text.Jasmine
> import qualified Data.ByteString.Lazy.Char8 as C
> 
> import System.FilePath
> 
> import Control.Monad (forM_, zipWithM_, liftM)
> import Data.Monoid (mappend, mconcat)
> 
> import Data.List (isPrefixOf, tails, findIndex, intercalate, sortBy)
> 
> import Data.Time.Clock (UTCTime)
> import Data.Time.Format (parseTimeM,defaultTimeLocale)
> 
> import Text.Blaze.Html.Renderer.String (renderHtml)
> import Text.Blaze.Internal (preEscapedString)
> import Text.Blaze.Html ((!), toHtml, toValue)
> import qualified Text.Blaze.Html5 as H
> import qualified Text.Blaze.Html5.Attributes as A
> 
> pandocWriteOptions = defaultHakyllWriterOptions
>     { writerReferenceLinks = True
>     }
> 
> main :: IO ()
> main = hakyll $ do
> 
>     match "templates/*" $ compile templateCompiler
> 
>     forM_ ["favicon.ico"
>           ,"robots.txt" 
>           ,"files/**" 
>           ,"img/**" 
>            ] $ \p ->
>       match p $ do
>         route idRoute
>         compile copyFileCompiler

Css is minified using the compressCssCompiler that comes as part of Hakyll

>     match "css/**" $ do
>         route idRoute
>         compile compressCssCompiler

We can automatically minify JS using hjsmin.

>     match "js/**" $ do
>         route idRoute
>         compile minifyJSCompiler

I don’t use tags yet, but one day I might.

>     tags <- buildTags "posts/*" (fromCapture "label/*")

For post that are already html I don’t want to run them through pandoc so I just use getResourceBody.

>     match "posts/*.html" $ do
>       route blogRoute
>       compile $ do
>         let allCtx = field "recent" (\_ -> recentPostList) `mappend` defaultContext
>         getResourceBody
>                     >>= saveSnapshot "teaser"
>                     >>= loadAndApplyTemplate "templates/post.html" (allCtx)
>                     >>= saveSnapshot "content"
>                     >>= loadAndApplyTemplate "templates/default.html" defaultContext
>                     >>= slashUrlsCompiler

Posts that are not already html need to be run through pandoc first.

>     match "posts/*" $ do
>       route blogRoute
>       compile $ do
>         let allCtx = field "recent" (\_ -> recentPostList) `mappend` defaultContext
>         pandocCompilerWith defaultHakyllReaderOptions pandocWriteOptions
>                     >>= saveSnapshot "teaser"
>                     >>= loadAndApplyTemplate "templates/post.html" (allCtx)
>                     >>= saveSnapshot "content"
>                     >>= loadAndApplyTemplate "templates/default.html" defaultContext
>                     >>= slashUrlsCompiler

Magic pagination; I’m not 100% sure how this works.

>     paginate 5 $ \index maxIndex itemsForPage -> do
>         let id = fromFilePath $ "blog/" ++ (if index==1 then "" else show index) ++ "/index.html"
>         create [id] $ do
>             route idRoute
>             compile $ do
>                 let allCtx =
>                         field "recent" (\_ -> recentPostList) `mappend`
>                         field "title" (\_ -> return "Blog") `mappend`
>                         defaultContext
>                     loadTeaser id = loadSnapshot id "teaser"
>                                         >>= loadAndApplyTemplate "templates/teaser.html" (teaserCtx tags)
>                                         >>= slashUrlsCompiler
>                 items <- sequence $ map loadTeaser itemsForPage
>                 let itembodies = map itemBody items
>                     postsCtx =
>                         constField "posts" (concat itembodies) `mappend`
>                         field "navlinkolder" (\_ -> return $ indexNavLink index 1 maxIndex) `mappend`
>                         field "navlinknewer" (\_ -> return $ indexNavLink index (-1) maxIndex) `mappend`
>                         field "recent" (\_ -> recentPostList) `mappend`
>                         defaultContext
> 
>                 makeItem ""
>                     >>= loadAndApplyTemplate "templates/blogpage.html" postsCtx
>                     >>= loadAndApplyTemplate "templates/default.html" allCtx
>                     >>= slashUrlsCompiler

The RSS feed loads the “teaser” snapshot because the “content” snapshot includes things like the page sidebar which shouldn’t be part of the feed.

>     create ["blog/rss.xml"] $ do
>         route idRoute
>         compile $ do
>             posts <- fmap (take 10) . recentFirst =<<
>                 loadAllSnapshots "posts/*" "teaser"
>             renderRss feedConfiguration feedContext posts
>                     
>     match "pages/*.html" $ do
>        route pagesRoute
>        compile $ do
>          let allCtx = field "recent" (\_ -> recentPostList) `mappend` defaultContext
>          getResourceBody
>            >>= saveSnapshot "teaser"
>            >>= loadAndApplyTemplate "templates/page.html" (allCtx)
>            >>= saveSnapshot "content"
>            >>= loadAndApplyTemplate "templates/default.html" defaultContext                 
>            >>= slashUrlsCompiler
>            
>     match "pages/*" $ do
>        route pagesRoute
>        compile $ do
>          let allCtx = field "recent" (\_ -> recentPostList) `mappend` defaultContext
>          pandocCompilerWith defaultHakyllReaderOptions pandocWriteOptions
>            >>= saveSnapshot "teaser"
>            >>= loadAndApplyTemplate "templates/page.html" (allCtx)
>            >>= saveSnapshot "content"
>            >>= loadAndApplyTemplate "templates/default.html" defaultContext                 
>            >>= slashUrlsCompiler           
> 
>     match "raw/*" $ do
>       route rawRoute
>       compile getResourceBody
> 
>     create ["index.html"] $ do       
>       route idRoute
>       compile $ do
>         let loadTeaser id = loadSnapshot id "teaser"
>                                 >>= loadAndApplyTemplate "templates/teaser.html" (teaserCtx tags)
>                                 >>= slashUrlsCompiler
>         recent <- recentPosts
>         items <- sequence $ map loadTeaser $ map itemIdentifier recent
>         let itembodies = take 3 $ reverse $ map itemBody items
>             postsCtx =  constField "posts" (concat itembodies) `mappend`
>                         defaultContext
>         makeItem ""
>          >>= loadAndApplyTemplate "templates/index.html" postsCtx
>          >>= loadAndApplyTemplate "templates/default.html" (field "title" (\_ -> return "Home") `mappend` defaultContext)
>          >>= slashUrlsCompiler
>            
>     create ["contact.html"] $ do 
>       route (constRoute "contact/index.html")
>       compile $ do
>         let ctx = field "title" (\_ -> return "Contact Me") `mappend` defaultContext
>         makeItem ""     
>           >>= loadAndApplyTemplate "templates/contact.html" ctx
>           >>= loadAndApplyTemplate "templates/default.html" ctx
>           >>= slashUrlsCompiler
>           
>     create ["services.html"] $ do 
>       route (constRoute "services/index.html")
>       compile $ do
>         let ctx = field "title" (\_ -> return "Services") `mappend` defaultContext
>         makeItem ""     
>           >>= loadAndApplyTemplate "templates/services.html" ctx
>           >>= loadAndApplyTemplate "templates/default.html" ctx
>           >>= slashUrlsCompiler
> 
>     match "site.lhs" $ do 
>       route (constRoute "site/index.html")
>       compile $ do
>         let ctx = field "title" (\_ -> return "Hakyll code for this site") `mappend` defaultContext
>         pandocCompilerWith defaultHakyllReaderOptions pandocWriteOptions
>           >>= loadAndApplyTemplate "templates/page.html" ctx
>           >>= loadAndApplyTemplate "templates/default.html" ctx
>           >>= slashUrlsCompiler
> 
>     create ["blog.xml"] $ do
>       route (constRoute "blog.xml")
>       compile $ do
>         let loadTeaser id = loadSnapshot id "teaser"
>                                 >>= loadAndApplyTemplate "templates/page.xml" defaultContext
>                                 >>= sitemapUrlsCompiler
>         recent <- recentPosts
>         items <- sequence $ map loadTeaser $ map itemIdentifier recent
>         let itembodies = reverse $ map itemBody items
>             postsCtx =  constField "pages" (concat itembodies) `mappend`
>                           defaultContext
>         makeItem ""
>          >>= loadAndApplyTemplate "templates/sitemap.xml" postsCtx
>          >>= sitemapUrlsCompiler
> 
>     create ["pages.xml"] $ do
>       route (constRoute "pages.xml")
>       compile $ do
>         let loadTeaser id = loadSnapshot id "teaser"
>                                 >>= loadAndApplyTemplate "templates/page.xml" defaultContext
>                                 >>= sitemapUrlsCompiler
>         all <- getMatches "pages/**"
>         let ids = [Item i "" | i <- all]
>         items <- sequence $ map loadTeaser $ map itemIdentifier ids
>         let itembodies = reverse $ map itemBody items
>             postsCtx =  constField "pages" (concat itembodies) `mappend`
>                           defaultContext
>         makeItem ""
>          >>= loadAndApplyTemplate "templates/sitemap.xml" postsCtx
>          >>= sitemapUrlsCompiler      

Now we get to lots of utility functions.

> postCtx :: Tags -> Context String
> postCtx tags = mconcat
>     [ dateField "date" "%B %e, %Y"
>     , tagsField "tags" tags
>     , defaultContext
>     ]                 

Minified javascript using hjsmin.

> minifyJSCompiler = do
>   s<-getResourceString 
>   return $ itemSetBody (minifyJS s) s
>   
> minifyJS = C.unpack . minify . C.pack . itemBody

Cleans up the post urls and places them in the “blog” rather than “posts” folder.

I’m not sure why I prefer this.

> blogRoute :: Routes
> blogRoute  =
>      gsubRoute "posts/" (const "blog/") 
>      `composeRoutes` cleanDate 
>      `composeRoutes` gsubRoute ".markdown" (const "/index.html")
>      `composeRoutes` gsubRoute ".html" (const "/index.html")
>      `composeRoutes` gsubRoute ".lhs" (const "/index.html")
>      
> pagesRoute :: Routes
> pagesRoute = gsubRoute "pages/" (const "") 
>                 `composeRoutes` gsubRoute ".html" (const "/index.html")
>                 `composeRoutes` gsubRoute ".markdown" (const "/index.html")
>                 `composeRoutes` gsubRoute ".lhs" (const "/index.html")
> 
> rawRoute :: Routes
> rawRoute = gsubRoute "raw/" (const "") 
>                 `composeRoutes` gsubRoute ".html" (const "/index.html")
>                 `composeRoutes` gsubRoute ".markdown" (const "/index.html")
>                 `composeRoutes` gsubRoute ".lhs" (const "/index.html")
> 
> cleanDate :: Routes
> cleanDate = customRoute removeDatePrefix
> 
> removeDatePrefix :: Identifier -> FilePath
> removeDatePrefix ident = replaceFileName file (drop 11 $ takeFileName file)
>   where file = toFilePath ident 

Get a list of recent posts and apply a template.

> recentPostList :: Compiler String
> recentPostList = do
>     posts <- fmap (take 5) . recentFirst =<< recentPosts
>     itemTpl <- loadBody "templates/indexpostitem.html"
>     list <- applyTemplateList itemTpl defaultContext posts
>     return $ slashUrls list

The homepage has a different template.

> recentPostsHomepage :: Compiler String
> recentPostsHomepage = do
>     posts <- fmap (take 5) . recentFirst =<< recentPosts
>     itemTpl <- loadBody "templates/homeposts.html"
>     list <- applyTemplateList itemTpl defaultContext posts
>     return $ slashUrls list
> 
> recentPosts :: Compiler [Item String]
> recentPosts = do
>     identifiers <- getMatches "posts/*"
>     return [Item identifier "" | identifier <- identifiers]

I don’t want any “/index.html” links hanging round; these functions sort that out.

> slashUrlsCompiler :: Item String -> Compiler (Item String)
> slashUrlsCompiler item = do
>     route <- getRoute $ itemIdentifier item
>     return $ case route of
>         Nothing -> item
>         Just r -> fmap slashUrls item
> 
> slashUrls :: String -> String 
> slashUrls = fileLinks . withUrls convert
>   where
>     convert = replaceAll "/index.html" (const "/")
>     fileLinks = replaceAll "/files/" (const "/files/")

URLs in the sitemap are in tags rather than tags which (I think this is the problem) means that the slashURLs compiler doesn’t work because of how it uses withUrls

> sitemapUrlsCompiler :: Item String -> Compiler (Item String)
> sitemapUrlsCompiler item = do
>   route <- getRoute $ itemIdentifier item
>   return $ case route of
>     Nothing -> item
>     Just r -> fmap (replaceAll "/index.html" (const "/")) item

Extracts the teaser from a blog post and filters some HTML tags from it.

> teaserBody :: Item String -> Compiler String
> teaserBody item = do
>     let body = itemBody item
>     return $ extractTeaser . maxLengthTeaser . compactTeaser $ body
>   where
>     extractTeaser :: String -> String
>     extractTeaser [] = []
>     extractTeaser xs@(x : xr)
>         | "<!-- more -->" `isPrefixOf` xs = []
>         | otherwise = x : extractTeaser xr
> 
>     maxLengthTeaser :: String -> String
>     maxLengthTeaser s = if findIndex (isPrefixOf "<!-- more -->") (tails s) == Nothing
>                             then unwords (take 60 (words s))
>                             else s
> 
>     compactTeaser :: String -> String
>     compactTeaser =
>         (replaceAll "<iframe [^>]*>" (const "")) .
>         (replaceAll "<img [^>]*>" (const "")) .
>         (replaceAll "<p>" (const "")) .
>         (replaceAll "</p>" (const "")) .
>         (replaceAll "<blockquote>" (const "")) .
>         (replaceAll "</blockquote>" (const "")) .
>         (replaceAll "<strong>" (const "")) .
>         (replaceAll "</strong>" (const "")) .
>         (replaceAll "<ol>" (const "")) .
>         (replaceAll "</ol>" (const "")) .
>         (replaceAll "<ul>" (const "")) .
>         (replaceAll "</ul>" (const "")) .
>         (replaceAll "<li>" (const "")) .
>         (replaceAll "</li>" (const "")) .
>         (replaceAll "<h[0-9][^>]*>" (const "")) .
>         (replaceAll "</h[0-9]>" (const "")) .
>         (replaceAll "<pre.*" (const "")) .
>         (replaceAll "<a [^>]*>" (const "")) .
>         (replaceAll "</a>" (const "")) .
>         (replaceAll "<div [^>]*>" (const "")) .
>         (replaceAll "</div>" (const ""))

The mysterious paginate function

> paginate:: Int -> (Int -> Int -> [Identifier] -> Rules ()) -> Rules ()
> paginate itemsPerPage rules = do
>     identifiers <- getMatches "posts/*"
> 
>     let sorted = reverse $ sortBy byDate identifiers
>         chunks = chunk itemsPerPage sorted
>         maxIndex = length chunks
>         pageNumbers = take maxIndex [1..]
>         process i is = rules i maxIndex is
>     zipWithM_ process pageNumbers chunks
>         where
>             byDate id1 id2 =
>                 let fn1 = takeFileName $ toFilePath id1
>                     fn2 = takeFileName $ toFilePath id2
>                     parseTime' fn = parseTimeM True defaultTimeLocale "%Y-%m-%d" $ intercalate "-" $ take 3 $ splitAll "-" fn
>                 in compare ((parseTime' fn1) :: Maybe UTCTime) ((parseTime' fn2) :: Maybe UTCTime)
>                    
> chunk :: Int -> [a] -> [[a]]
> chunk n [] = []
> chunk n xs = ys : chunk n zs
>     where (ys,zs) = splitAt n xs

A context for teasers.

> teaserCtx :: Tags -> Context String
> teaserCtx tags =
>     field "teaser" teaserBody `mappend`
>     (postCtx tags)

Create the “Older Entries” and “Newer Entries” links for blog pages

> indexNavLink :: Int -> Int -> Int -> String
> indexNavLink n d maxn = renderHtml ref
>   where ref = if (refPage == "") then ""
>               else H.a ! A.href (toValue $ toUrl $ refPage) $
>                    (preEscapedString lab)
>         lab = if (d > 0) then "Older Entries &raquo;" else "&laquo; Newer Entries"
>         refPage = if (n + d < 1 || n + d > maxn) then ""
>                   else case (n + d) of
>                     1 -> "/blog/"
>                     _ -> "/blog/" ++ (show $ n + d) ++ "/"

A context for the RSS feed.

> feedContext :: Context String
> feedContext = mconcat
>     [ rssBodyField "description"
>     , rssTitleField "title"
>     , wpUrlField "url"
>     , dateField "date" "%B %e, %Y"
>     ]

Configuration for the RSS feed.

> feedConfiguration :: FeedConfiguration
> feedConfiguration = FeedConfiguration
>     { feedTitle = "E-Analytica Blog"
>     , feedDescription = "Website Measurement and Optimisation"
>     , feedAuthorName = "Richard Fergie"
>     , feedAuthorEmail = "fergie@eanalytica.com"
>     , feedRoot = "http://www.eanalytica.com"
>     }

Removes iframes from the feed and makes urls correct (not index.html) and absolute

> rssBodyField :: String -> Context String
> rssBodyField key = field key $
>     return .
>     (replaceAll "<iframe [^>]*>" (const "")) .
>     (withUrls wordpress) .
>     (withUrls absolute) .
>     itemBody
>   where
>     wordpress x = replaceAll "/index.html" (const "/") x
>     absolute x 
>       | (head x) == '/' = (feedRoot feedConfiguration) ++ x
>       | (take 8 x) == "/files/" = (feedRoot feedConfiguration) ++ (drop 1 x)                    
>       | otherwise = x
> 
> empty :: Compiler String
> empty = return ""
> 
> rssTitleField :: String -> Context a
> rssTitleField key = field key $ \i -> do
>     value <- getMetadataField (itemIdentifier i) "title"
>     let value' = liftM (replaceAll "&" (const "&amp;")) value
>     maybe empty return value'
> 
> 
> toWordPressUrl :: FilePath -> String
> toWordPressUrl url =
>     replaceAll "/index.html" (const "/") (toUrl url)
> 
> wpUrlField :: String -> Context a
> wpUrlField key = field key $
>     fmap (maybe "" toWordPressUrl) . getRoute . itemIdentifier