#!/usr/bin/python
#
# based on a script originally at http://pastebin.com/f167029a
# note that this uses a locally installed wget
# usage: 
#   m4a_from_iplayer.py http://www.bbc.co.uk/iplayer/page/item/b00936dz.shtml

import os
import re
import sys
import urllib

pattern = re.compile("<title>")

def mp4url_from_showurl(url):
	page = urllib.urlopen(url).readlines()
	for line in page:
		if line.startswith("    pid       : '"):
			pid = line[17:17+8]
			return "http://www.bbc.co.uk/mediaselector/3/auth/iplayer_streaming_http_mp4/%s" % (pid)
	
	return None # not found!

def title_from_showurl(url):
	page = urllib.urlopen(url).readlines()
	for line in page:
		if re.search(pattern, line):
			filename = line;
			filename = re.sub("\s*<title>BBC iPlayer - ", "", filename)
			filename = re.sub("</title>\s*", ".m4a", filename)
			return filename

page = sys.argv[1]

if not page.startswith("http://www.bbc.co.uk/iplayer/"):
  print "Not an iPlayer URL"
  sys.exit  

mp4url   = mp4url_from_showurl(page)
filename = title_from_showurl(page)

if mp4url != None:
  print mp4url
  # TODO urllib.urlretrieve(url, filename, report hook)
  os.execv('/usr/local/bin/wget', ['wget', '-O', filename, mp4url])
  print "Done"
else:
  print "No URL found"
