Information wants to be free...

XSPF Coverage and Duplication Check

Two new XSPF playlist use cases has come to mind. Checking for duplicate file references across playlists and checking for coverage. By coverage, I mean checking if all files within a directory structure is actually referenced by the playlist(s).

Both scripts are based on the XSPF integrity check script I made earlier, and the same parser is used.

Script for duplication check:

#!/usr/bin/python

import xml.dom.minidom
import re
import os.path

xspf_files = dict()

def xspf_parse(playlist_filename, handler):
    xml_data = xml.dom.minidom.parse(playlist_filename)
    for playlist in xml_data.getElementsByTagName("playlist"):
        for tracklist in playlist.getElementsByTagName("trackList"):
            for track in tracklist.getElementsByTagName("track"):
                for location in track.getElementsByTagName("location"):
                    data = re.sub("%([0-9a-fA-F]{2})", \
                        lambda x: chr(int(x.group(1), 16)), \
                        location.firstChild.data.encode("utf-8"))
                    track_filename = data.decode("utf-8").replace("file://", "")
                    handler(playlist_filename, track_filename)

def file_check(playlist_filename, track_filename):
    if track_filename in xspf_files:
        print track_filename, "-->", xspf_files[track_filename], "&", playlist_filename
    else:
        xspf_files[track_filename] = playlist_filename

if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print "Usage: %s <xspf file> ... <xspf file>" % (sys.argv[0])
        sys.exit(1)

    for filename in sys.argv[1:]:
        xspf_parse(filename, file_check)

    sys.exit(0)
          


Script for coverage check:

#!/usr/bin/python

import xml.dom.minidom
import re
import os

xspf_files = set()
fs_files = set()

def xspf_parse(playlist_filename, handler):
    xml_data = xml.dom.minidom.parse(playlist_filename)
    for playlist in xml_data.getElementsByTagName("playlist"):
        for tracklist in playlist.getElementsByTagName("trackList"):
            for track in tracklist.getElementsByTagName("track"):
                for location in track.getElementsByTagName("location"):
                    data = re.sub("%([0-9a-fA-F]{2})", \
                        lambda x: chr(int(x.group(1), 16)), \
                        location.firstChild.data.encode("utf-8"))
                    track_filename = data.decode("utf-8").replace("file://", "")
                    handler(playlist_filename, track_filename)

def add_xspf_file(playlist_filename, track_filename):
    xspf_files.add(track_filename)

if __name__ == "__main__":
    import sys

    if len(sys.argv) < 3:
        print "Usage: %s <directory> <xspf file> ... <xspf file>" % (sys.argv[0])
        sys.exit(1)

    for root, dirs, files in os.walk(sys.argv[1]):
        for filename in files:
            fs_files.add(os.path.join(root, filename).decode("iso-8859-1"))

    for filename in sys.argv[2:]:
        xspf_parse(filename, add_xspf_file)

    fs_covered = float(len(fs_files.intersection(xspf_files)))
    fs_total = float(len(fs_files))
    print "Coverage: %.2f%%" % ((fs_covered / fs_total) * 100)
    print "Missing Files:"
    for filename in fs_files.difference(xspf_files):
        print filename

    sys.exit(0)
          


Topic: Scripts and Code, by Kjetil @ 06/03-2015, Article Link