HSH is a cool haskell library that allows you to leverage your shell scripting prowess in haskell programs. In this simple broadcatcher, I use curl for http get, and other standard unix tools for tracking history (so we don’t get the same file twice). The feed parsing and filtering is done in haskell using the Text.Feed and Text.Regex libraries.
Note: if you decide to use this in real life, be sure to respect your feed’s time to live (ttl) in your crontab.
-
#!/usr/bin/env runhaskell
-
-
import Char
-
import Data.List
-
import HSH
-
import Maybe
-
import Text.Feed.Import
-
import Text.Feed.Query
-
import Text.Regex.Posix
-
-
– CONFIGURATION –
-
dlDir = "/path/to/download/dir/"
-
historyFile = "/path/to/download/history.log"
-
-
any_patterns = ["some.*thing", "something.*else", "etc"]
-
all_patterns = ["every.*thing"]
-
none_patterns = ["some.*boring.*thing"]
-
-
feed_url = "http://my/feed.rss"
-
-
– curl cli flags (see man curl)
-
curl_opts = ""
-
-
– END CONFIGURATION –
-
-
curl = "curl -s " ++ curl_opts
-
fetchFeed = curl ++ "\"" ++ feed_url ++ "\""
-
fetchFiles = "(cd " ++ dlDir ++ " && xargs -r " ++ curl ++ " -O)"
-
-
-
filters = [match_any any_patterns, match_all all_patterns, match_none none_patterns]
-
-
– filter using a list of predicates
-
-
filterSubscriptions lines =
-
where title (x, _) = x
-
link (_,x) = x
-
titleAndLink item = do title <- getItemTitle item
-
link <- getItemLink item
-
-
checkHistory = "bash -c \"sort | diff <(sort " ++ historyFile ++ ") – | sed -n ‘s/^> //p’ | tee -a " ++ historyFile ++ "\""
-
-
test = runIO $ "cat /tmp/feed.xml" -|- filterSubscriptions
-
main = runIO $ fetchFeed -|- "tee /tmp/feed.xml" -|- filterSubscriptions -|- checkHistory -|- fetchFiles