#!/usr/bin/python # # based on a script originally at http://pastebin.com/f167029a # note that this uses a locally installed wget # usage: # m4a_from_iplayer.py http://www.bbc.co.uk/iplayer/page/item/b00936dz.shtml import os import re import sys import urllib pattern = re.compile("") def mp4url_from_showurl(url): page = urllib.urlopen(url).readlines() for line in page: if line.startswith(" pid : '"): pid = line[17:17+8] return "http://www.bbc.co.uk/mediaselector/3/auth/iplayer_streaming_http_mp4/%s" % (pid) return None # not found! def title_from_showurl(url): page = urllib.urlopen(url).readlines() for line in page: if re.search(pattern, line): filename = line; filename = re.sub("\s*<title>BBC iPlayer - ", "", filename) filename = re.sub("\s*", ".m4a", filename) return filename page = sys.argv[1] if not page.startswith("http://www.bbc.co.uk/iplayer/"): print "Not an iPlayer URL" sys.exit mp4url = mp4url_from_showurl(page) filename = title_from_showurl(page) if mp4url != None: print mp4url # TODO urllib.urlretrieve(url, filename, report hook) os.execv('/usr/local/bin/wget', ['wget', '-O', filename, mp4url]) print "Done" else: print "No URL found"