- Add the base files

This commit is contained in:
Jaussoin Timothée 2014-04-16 14:11:43 +02:00
parent 2b3dca494f
commit 5141a2a85a
4 changed files with 264 additions and 3 deletions

View File

@ -1,4 +1,28 @@
atomtopubsub
============
== About ==
A little client that par Atom feeds and send them on XMPP Pubsub Nodes
AtomToPubsub is a simple Python software that parse Atom feeds and push
the entries on a XMPP Pubsub Node (http://xmpp.org/extensions/xep-0060.html)
== Installation ==
AtomToPubsub is built using Python 2.6 and use the librairies :
- feedparser
- time
- pickle
- sleekxmpp (version > 1.0, you can download and install it from here http://sleekxmpp.com/)
- sys
== Configuration ==
Rename config_default.py to config.py and set your Atom feeds and your
XMPP account configuration.
The XMPP account must be authorized to create Pubsub node on the server(s).
== Features ==
- The "key" of each feed of the configuration file will be the name of
the Pubsub node
- AtomToPubsub will try to fill the title and the description of the
Pubsub node from the title and the subtitle of the Atom node
- A cache file is created for performance issues

94
atomtopubsub.py Executable file
View File

@ -0,0 +1,94 @@
#!/usr/bin/env python
import feedparser
import time
import pickle
import publishx
import config
import logging
log = logging.getLogger('sleekxmpp')
log.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
log.addHandler(ch)
from socket import error as SocketError
from termcolor import colored, cprint
parsed = {}
connected = False
xmpp = publishx.publishx(config)
# We feed the pubsub nodes
def parse():
reload(config)
# We parse all the feeds
for key, feed in config.feeds.iteritems():
print colored('>> parsing %s' % key , 'magenta')
f = feedparser.parse(feed['url'])
if(f.bozo == 1):
print 'XML Error'
if(hasattr(f.bozo_exception, 'getMessage')):
print f.bozo_exception.getMessage()
if(hasattr(f.bozo_exception, 'getLineNumber')):
print 'at line %s' % f.bozo_exception.getLineNumber()
if(not key in parsed):
xmpp.create(feed['server'], key, f.feed)
# We check if we have some new entries
for entry in f.entries:
if key not in parsed or parsed[key] < entry.updated_parsed:
print colored('++ new entry %s' % entry.title, 'green')
else:
print colored('++ update entry %s' % entry.title, 'yellow')
xmpp.publish(feed['server'], key, entry)
# And we update the last updated date for the feed
if(f is not None and hasattr(f, 'updated_parsed')) :
parsed[key] = f.updated_parsed
else:
print colored('-- Parse failed for %s' % key, 'red')
save()
# We distribute the parsing
print colored('Parsing next feed in %.2f minutes' % (float(config.refresh_time)/len(config.feeds)), 'cyan')
time.sleep((float(config.refresh_time) * 60)/len(config.feeds))
def load():
try:
pkl_file = open('cache.pkl', 'rb')
parsed = pickle.load(pkl_file)
pkl_file.close()
return parsed
except IOError:
print 'Creating the cache'
return save()
def save():
output = open('cache.pkl', 'wb')
pickle.dump(parsed, output)
output.close()
return {}
parsed = load()
connected = xmpp.connect()
xmpp.process()
if(connected) :
while(1):
try:
parse()
except KeyboardInterrupt:
xmpp.disconnect(wait=True)
print "Exiting..."
break

29
config_default.py Executable file
View File

@ -0,0 +1,29 @@
# The feeds, /!\ Put Atom feeds only
feeds = {
'YIFY' : {
'url' : 'http://localhost/feedcleaner/?url=http://yify-torrents.com/rss',
'server' : 'pubsub.movim.eu'
},
'LEquipe' : {
'url' : 'http://localhost/feedcleaner/?url=http://www.lequipe.fr/rss/actu_rss.xml',
'server' : 'sport.mov.im'
},
'SportingNews' : {
'url' : 'http://localhost/feedcleaner/?url=http://www.sportingnews.com/rss',
'server' : 'sport.mov.im'
},
'OuestFrance' : {
'url' : 'http://localhost/feedcleaner/?url=http://www.ouest-france.fr/rss.xml',
'server' : 'news.mov.im'
}
}
# XMPP
jid = 'user@server.tld'
resource = 'atomtopubsub'
secret = 'password'
# Refresh intervals in minutes
refresh_time = 15

114
publishx.py Normal file
View File

@ -0,0 +1,114 @@
import sys
import logging
import getpass
from optparse import OptionParser
from termcolor import colored, cprint
#from sleekxmpp.xmlstream.stanzabase import ET
import sleekxmpp
from sleekxmpp.xmlstream import ET, tostring
import sleekxmpp.plugins.xep_0060.stanza.pubsub as pubsub
# Python versions before 3.0 do not use UTF-8 encoding
# by default. To ensure that Unicode is handled properly
# throughout SleekXMPP, we will set the default encoding
# ourselves to UTF-8.
if sys.version_info < (3, 0):
reload(sys)
sys.setdefaultencoding('utf8')
else:
raw_input = input
NS_ATOM = 'http://www.w3.org/2005/Atom'
NS_JABBER_DATA = 'jabber:x:data'
class publishx(sleekxmpp.ClientXMPP):
def __init__(self, config):
jid = config.jid
fulljid = config.jid + "/" + config.resource
secret = config.secret
resource = config.resource
sleekxmpp.ClientXMPP.__init__(self, fulljid, secret)
self.add_event_handler("session_start", self.start)
self.register_plugin('xep_0060')
def start(self, event):
self.send_presence(pshow='chat', pstatus= 'AtomToPubsub')
self.get_roster()
def create(self, server, node, feed):
title = description = logo = ''
if(hasattr(feed, 'title')):
title = feed.title
if(hasattr(feed, 'subtitle')):
description = feed.subtitle
print colored('>> create %s' % title, 'blue')
iq = self.Iq(stype="set", sto = server)
iq['pubsub']['create']['node'] = node
iq['pubsub']['configure']['form']['type'] = 'submit'
iq['pubsub']['configure']['form'].addField('pubsub#persist_items',
ftype = 'boolean',
value = 1)
iq['pubsub']['configure']['form'].addField('pubsub#title',
ftype = 'text-single',
value = title)
iq['pubsub']['configure']['form'].addField('pubsub#type',
ftype = 'text-single',
value = NS_ATOM)
iq['pubsub']['configure']['form'].addField('pubsub#description',
ftype = 'text-single',
value = description)
try:
print iq.send(timeout=5)
except:
print 'Iq Error'
def publish(self, server, node, entry):
iq = self.Iq(stype="set", sto = server)
iq['pubsub']['publish']['node'] = node
item = pubsub.Item()
item['id'] = entry.id
#payload = ET.Item()
ent = ET.Element("entry")
ent.set('xmlns', NS_ATOM)
title = ET.SubElement(ent, "title")
title.text = entry.title
updated = ET.SubElement(ent, "updated")
updated.text = entry.updated
if(hasattr(entry.content[0], 'type')):
content = ET.SubElement(ent, "content")
content.set('type', entry.content[0].type)
#document, errors = tidy_document()
content.text = entry.content[0].value
if(hasattr(entry, 'links')):
for l in entry.links:
link = ET.SubElement(ent, "link")
link.set('href', l['href'])
link.set('type', l['type'])
link.set('rel', l['rel'])
item['payload'] = ent
iq['pubsub']['publish'].append(item)
try:
print iq.send(timeout=5)
except:
print 'Iq Error'
def published():
print 'published'