From 5141a2a85ac80245900659cb65cf967de476bb63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaussoin=20Timoth=C3=A9e?= Date: Wed, 16 Apr 2014 14:11:43 +0200 Subject: [PATCH] - Add the base files --- README.md | 30 ++++++++++-- atomtopubsub.py | 94 ++++++++++++++++++++++++++++++++++++++ config_default.py | 29 ++++++++++++ publishx.py | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 264 insertions(+), 3 deletions(-) create mode 100755 atomtopubsub.py create mode 100755 config_default.py create mode 100644 publishx.py diff --git a/README.md b/README.md index 81e7725..972ca7f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,28 @@ -atomtopubsub -============ +== About == -A little client that par Atom feeds and send them on XMPP Pubsub Nodes +AtomToPubsub is a simple Python software that parse Atom feeds and push +the entries on a XMPP Pubsub Node (http://xmpp.org/extensions/xep-0060.html) + +== Installation == + +AtomToPubsub is built using Python 2.6 and use the librairies : +- feedparser +- time +- pickle +- sleekxmpp (version > 1.0, you can download and install it from here http://sleekxmpp.com/) +- sys + +== Configuration == + +Rename config_default.py to config.py and set your Atom feeds and your +XMPP account configuration. + +The XMPP account must be authorized to create Pubsub node on the server(s). + +== Features == + +- The "key" of each feed of the configuration file will be the name of +the Pubsub node +- AtomToPubsub will try to fill the title and the description of the +Pubsub node from the title and the subtitle of the Atom node +- A cache file is created for performance issues diff --git a/atomtopubsub.py b/atomtopubsub.py new file mode 100755 index 0000000..3cbcef5 --- /dev/null +++ b/atomtopubsub.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +import feedparser +import time +import pickle + +import publishx +import config + +import logging + +log = logging.getLogger('sleekxmpp') +log.setLevel(logging.INFO) +ch = logging.StreamHandler() +ch.setLevel(logging.INFO) +formatter = logging.Formatter('%(message)s') +ch.setFormatter(formatter) +log.addHandler(ch) + +from socket import error as SocketError +from termcolor import colored, cprint + +parsed = {} +connected = False +xmpp = publishx.publishx(config) + +# We feed the pubsub nodes +def parse(): + reload(config) + + # We parse all the feeds + for key, feed in config.feeds.iteritems(): + print colored('>> parsing %s' % key , 'magenta') + f = feedparser.parse(feed['url']) + + if(f.bozo == 1): + print 'XML Error' + if(hasattr(f.bozo_exception, 'getMessage')): + print f.bozo_exception.getMessage() + if(hasattr(f.bozo_exception, 'getLineNumber')): + print 'at line %s' % f.bozo_exception.getLineNumber() + + if(not key in parsed): + xmpp.create(feed['server'], key, f.feed) + + # We check if we have some new entries + for entry in f.entries: + if key not in parsed or parsed[key] < entry.updated_parsed: + print colored('++ new entry %s' % entry.title, 'green') + else: + print colored('++ update entry %s' % entry.title, 'yellow') + xmpp.publish(feed['server'], key, entry) + + # And we update the last updated date for the feed + if(f is not None and hasattr(f, 'updated_parsed')) : + parsed[key] = f.updated_parsed + else: + print colored('-- Parse failed for %s' % key, 'red') + + save() + + # We distribute the parsing + print colored('Parsing next feed in %.2f minutes' % (float(config.refresh_time)/len(config.feeds)), 'cyan') + time.sleep((float(config.refresh_time) * 60)/len(config.feeds)) + +def load(): + try: + pkl_file = open('cache.pkl', 'rb') + parsed = pickle.load(pkl_file) + pkl_file.close() + return parsed + except IOError: + print 'Creating the cache' + return save() + +def save(): + output = open('cache.pkl', 'wb') + pickle.dump(parsed, output) + output.close() + return {} + +parsed = load() +connected = xmpp.connect() +xmpp.process() + +if(connected) : + while(1): + try: + parse() + + except KeyboardInterrupt: + xmpp.disconnect(wait=True) + print "Exiting..." + break diff --git a/config_default.py b/config_default.py new file mode 100755 index 0000000..ba8d7c1 --- /dev/null +++ b/config_default.py @@ -0,0 +1,29 @@ +# The feeds, /!\ Put Atom feeds only +feeds = { + 'YIFY' : { + 'url' : 'http://localhost/feedcleaner/?url=http://yify-torrents.com/rss', + 'server' : 'pubsub.movim.eu' + }, + + 'LEquipe' : { + 'url' : 'http://localhost/feedcleaner/?url=http://www.lequipe.fr/rss/actu_rss.xml', + 'server' : 'sport.mov.im' + }, + 'SportingNews' : { + 'url' : 'http://localhost/feedcleaner/?url=http://www.sportingnews.com/rss', + 'server' : 'sport.mov.im' + }, + + 'OuestFrance' : { + 'url' : 'http://localhost/feedcleaner/?url=http://www.ouest-france.fr/rss.xml', + 'server' : 'news.mov.im' + } + } + +# XMPP +jid = 'user@server.tld' +resource = 'atomtopubsub' +secret = 'password' + +# Refresh intervals in minutes +refresh_time = 15 diff --git a/publishx.py b/publishx.py new file mode 100644 index 0000000..c53ec99 --- /dev/null +++ b/publishx.py @@ -0,0 +1,114 @@ +import sys +import logging +import getpass +from optparse import OptionParser +from termcolor import colored, cprint + +#from sleekxmpp.xmlstream.stanzabase import ET + +import sleekxmpp +from sleekxmpp.xmlstream import ET, tostring +import sleekxmpp.plugins.xep_0060.stanza.pubsub as pubsub + +# Python versions before 3.0 do not use UTF-8 encoding +# by default. To ensure that Unicode is handled properly +# throughout SleekXMPP, we will set the default encoding +# ourselves to UTF-8. +if sys.version_info < (3, 0): + reload(sys) + sys.setdefaultencoding('utf8') +else: + raw_input = input + +NS_ATOM = 'http://www.w3.org/2005/Atom' +NS_JABBER_DATA = 'jabber:x:data' + +class publishx(sleekxmpp.ClientXMPP): + def __init__(self, config): + jid = config.jid + fulljid = config.jid + "/" + config.resource + secret = config.secret + resource = config.resource + + sleekxmpp.ClientXMPP.__init__(self, fulljid, secret) + + self.add_event_handler("session_start", self.start) + self.register_plugin('xep_0060') + + def start(self, event): + self.send_presence(pshow='chat', pstatus= 'AtomToPubsub') + self.get_roster() + + def create(self, server, node, feed): + title = description = logo = '' + + if(hasattr(feed, 'title')): + title = feed.title + if(hasattr(feed, 'subtitle')): + description = feed.subtitle + print colored('>> create %s' % title, 'blue') + + iq = self.Iq(stype="set", sto = server) + iq['pubsub']['create']['node'] = node + iq['pubsub']['configure']['form']['type'] = 'submit' + iq['pubsub']['configure']['form'].addField('pubsub#persist_items', + ftype = 'boolean', + value = 1) + iq['pubsub']['configure']['form'].addField('pubsub#title', + ftype = 'text-single', + value = title) + iq['pubsub']['configure']['form'].addField('pubsub#type', + ftype = 'text-single', + value = NS_ATOM) + iq['pubsub']['configure']['form'].addField('pubsub#description', + ftype = 'text-single', + value = description) + + try: + print iq.send(timeout=5) + except: + print 'Iq Error' + + def publish(self, server, node, entry): + + iq = self.Iq(stype="set", sto = server) + iq['pubsub']['publish']['node'] = node + + item = pubsub.Item() + item['id'] = entry.id + + #payload = ET.Item() + ent = ET.Element("entry") + ent.set('xmlns', NS_ATOM) + + title = ET.SubElement(ent, "title") + title.text = entry.title + + updated = ET.SubElement(ent, "updated") + updated.text = entry.updated + + if(hasattr(entry.content[0], 'type')): + content = ET.SubElement(ent, "content") + content.set('type', entry.content[0].type) + + #document, errors = tidy_document() + content.text = entry.content[0].value + + if(hasattr(entry, 'links')): + for l in entry.links: + link = ET.SubElement(ent, "link") + link.set('href', l['href']) + link.set('type', l['type']) + link.set('rel', l['rel']) + + + item['payload'] = ent + + iq['pubsub']['publish'].append(item) + + try: + print iq.send(timeout=5) + except: + print 'Iq Error' + def published(): + print 'published'