- Add the base files
This commit is contained in:
parent
2b3dca494f
commit
5141a2a85a
30
README.md
30
README.md
@ -1,4 +1,28 @@
|
|||||||
atomtopubsub
|
== About ==
|
||||||
============
|
|
||||||
|
|
||||||
A little client that par Atom feeds and send them on XMPP Pubsub Nodes
|
AtomToPubsub is a simple Python software that parse Atom feeds and push
|
||||||
|
the entries on a XMPP Pubsub Node (http://xmpp.org/extensions/xep-0060.html)
|
||||||
|
|
||||||
|
== Installation ==
|
||||||
|
|
||||||
|
AtomToPubsub is built using Python 2.6 and use the librairies :
|
||||||
|
- feedparser
|
||||||
|
- time
|
||||||
|
- pickle
|
||||||
|
- sleekxmpp (version > 1.0, you can download and install it from here http://sleekxmpp.com/)
|
||||||
|
- sys
|
||||||
|
|
||||||
|
== Configuration ==
|
||||||
|
|
||||||
|
Rename config_default.py to config.py and set your Atom feeds and your
|
||||||
|
XMPP account configuration.
|
||||||
|
|
||||||
|
The XMPP account must be authorized to create Pubsub node on the server(s).
|
||||||
|
|
||||||
|
== Features ==
|
||||||
|
|
||||||
|
- The "key" of each feed of the configuration file will be the name of
|
||||||
|
the Pubsub node
|
||||||
|
- AtomToPubsub will try to fill the title and the description of the
|
||||||
|
Pubsub node from the title and the subtitle of the Atom node
|
||||||
|
- A cache file is created for performance issues
|
||||||
|
94
atomtopubsub.py
Executable file
94
atomtopubsub.py
Executable file
@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import publishx
|
||||||
|
import config
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
log = logging.getLogger('sleekxmpp')
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setLevel(logging.INFO)
|
||||||
|
formatter = logging.Formatter('%(message)s')
|
||||||
|
ch.setFormatter(formatter)
|
||||||
|
log.addHandler(ch)
|
||||||
|
|
||||||
|
from socket import error as SocketError
|
||||||
|
from termcolor import colored, cprint
|
||||||
|
|
||||||
|
parsed = {}
|
||||||
|
connected = False
|
||||||
|
xmpp = publishx.publishx(config)
|
||||||
|
|
||||||
|
# We feed the pubsub nodes
|
||||||
|
def parse():
|
||||||
|
reload(config)
|
||||||
|
|
||||||
|
# We parse all the feeds
|
||||||
|
for key, feed in config.feeds.iteritems():
|
||||||
|
print colored('>> parsing %s' % key , 'magenta')
|
||||||
|
f = feedparser.parse(feed['url'])
|
||||||
|
|
||||||
|
if(f.bozo == 1):
|
||||||
|
print 'XML Error'
|
||||||
|
if(hasattr(f.bozo_exception, 'getMessage')):
|
||||||
|
print f.bozo_exception.getMessage()
|
||||||
|
if(hasattr(f.bozo_exception, 'getLineNumber')):
|
||||||
|
print 'at line %s' % f.bozo_exception.getLineNumber()
|
||||||
|
|
||||||
|
if(not key in parsed):
|
||||||
|
xmpp.create(feed['server'], key, f.feed)
|
||||||
|
|
||||||
|
# We check if we have some new entries
|
||||||
|
for entry in f.entries:
|
||||||
|
if key not in parsed or parsed[key] < entry.updated_parsed:
|
||||||
|
print colored('++ new entry %s' % entry.title, 'green')
|
||||||
|
else:
|
||||||
|
print colored('++ update entry %s' % entry.title, 'yellow')
|
||||||
|
xmpp.publish(feed['server'], key, entry)
|
||||||
|
|
||||||
|
# And we update the last updated date for the feed
|
||||||
|
if(f is not None and hasattr(f, 'updated_parsed')) :
|
||||||
|
parsed[key] = f.updated_parsed
|
||||||
|
else:
|
||||||
|
print colored('-- Parse failed for %s' % key, 'red')
|
||||||
|
|
||||||
|
save()
|
||||||
|
|
||||||
|
# We distribute the parsing
|
||||||
|
print colored('Parsing next feed in %.2f minutes' % (float(config.refresh_time)/len(config.feeds)), 'cyan')
|
||||||
|
time.sleep((float(config.refresh_time) * 60)/len(config.feeds))
|
||||||
|
|
||||||
|
def load():
|
||||||
|
try:
|
||||||
|
pkl_file = open('cache.pkl', 'rb')
|
||||||
|
parsed = pickle.load(pkl_file)
|
||||||
|
pkl_file.close()
|
||||||
|
return parsed
|
||||||
|
except IOError:
|
||||||
|
print 'Creating the cache'
|
||||||
|
return save()
|
||||||
|
|
||||||
|
def save():
|
||||||
|
output = open('cache.pkl', 'wb')
|
||||||
|
pickle.dump(parsed, output)
|
||||||
|
output.close()
|
||||||
|
return {}
|
||||||
|
|
||||||
|
parsed = load()
|
||||||
|
connected = xmpp.connect()
|
||||||
|
xmpp.process()
|
||||||
|
|
||||||
|
if(connected) :
|
||||||
|
while(1):
|
||||||
|
try:
|
||||||
|
parse()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
xmpp.disconnect(wait=True)
|
||||||
|
print "Exiting..."
|
||||||
|
break
|
29
config_default.py
Executable file
29
config_default.py
Executable file
@ -0,0 +1,29 @@
|
|||||||
|
# The feeds, /!\ Put Atom feeds only
|
||||||
|
feeds = {
|
||||||
|
'YIFY' : {
|
||||||
|
'url' : 'http://localhost/feedcleaner/?url=http://yify-torrents.com/rss',
|
||||||
|
'server' : 'pubsub.movim.eu'
|
||||||
|
},
|
||||||
|
|
||||||
|
'LEquipe' : {
|
||||||
|
'url' : 'http://localhost/feedcleaner/?url=http://www.lequipe.fr/rss/actu_rss.xml',
|
||||||
|
'server' : 'sport.mov.im'
|
||||||
|
},
|
||||||
|
'SportingNews' : {
|
||||||
|
'url' : 'http://localhost/feedcleaner/?url=http://www.sportingnews.com/rss',
|
||||||
|
'server' : 'sport.mov.im'
|
||||||
|
},
|
||||||
|
|
||||||
|
'OuestFrance' : {
|
||||||
|
'url' : 'http://localhost/feedcleaner/?url=http://www.ouest-france.fr/rss.xml',
|
||||||
|
'server' : 'news.mov.im'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# XMPP
|
||||||
|
jid = 'user@server.tld'
|
||||||
|
resource = 'atomtopubsub'
|
||||||
|
secret = 'password'
|
||||||
|
|
||||||
|
# Refresh intervals in minutes
|
||||||
|
refresh_time = 15
|
114
publishx.py
Normal file
114
publishx.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import getpass
|
||||||
|
from optparse import OptionParser
|
||||||
|
from termcolor import colored, cprint
|
||||||
|
|
||||||
|
#from sleekxmpp.xmlstream.stanzabase import ET
|
||||||
|
|
||||||
|
import sleekxmpp
|
||||||
|
from sleekxmpp.xmlstream import ET, tostring
|
||||||
|
import sleekxmpp.plugins.xep_0060.stanza.pubsub as pubsub
|
||||||
|
|
||||||
|
# Python versions before 3.0 do not use UTF-8 encoding
|
||||||
|
# by default. To ensure that Unicode is handled properly
|
||||||
|
# throughout SleekXMPP, we will set the default encoding
|
||||||
|
# ourselves to UTF-8.
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
reload(sys)
|
||||||
|
sys.setdefaultencoding('utf8')
|
||||||
|
else:
|
||||||
|
raw_input = input
|
||||||
|
|
||||||
|
NS_ATOM = 'http://www.w3.org/2005/Atom'
|
||||||
|
NS_JABBER_DATA = 'jabber:x:data'
|
||||||
|
|
||||||
|
class publishx(sleekxmpp.ClientXMPP):
|
||||||
|
def __init__(self, config):
|
||||||
|
jid = config.jid
|
||||||
|
fulljid = config.jid + "/" + config.resource
|
||||||
|
secret = config.secret
|
||||||
|
resource = config.resource
|
||||||
|
|
||||||
|
sleekxmpp.ClientXMPP.__init__(self, fulljid, secret)
|
||||||
|
|
||||||
|
self.add_event_handler("session_start", self.start)
|
||||||
|
self.register_plugin('xep_0060')
|
||||||
|
|
||||||
|
def start(self, event):
|
||||||
|
self.send_presence(pshow='chat', pstatus= 'AtomToPubsub')
|
||||||
|
self.get_roster()
|
||||||
|
|
||||||
|
def create(self, server, node, feed):
|
||||||
|
title = description = logo = ''
|
||||||
|
|
||||||
|
if(hasattr(feed, 'title')):
|
||||||
|
title = feed.title
|
||||||
|
if(hasattr(feed, 'subtitle')):
|
||||||
|
description = feed.subtitle
|
||||||
|
print colored('>> create %s' % title, 'blue')
|
||||||
|
|
||||||
|
iq = self.Iq(stype="set", sto = server)
|
||||||
|
iq['pubsub']['create']['node'] = node
|
||||||
|
iq['pubsub']['configure']['form']['type'] = 'submit'
|
||||||
|
iq['pubsub']['configure']['form'].addField('pubsub#persist_items',
|
||||||
|
ftype = 'boolean',
|
||||||
|
value = 1)
|
||||||
|
iq['pubsub']['configure']['form'].addField('pubsub#title',
|
||||||
|
ftype = 'text-single',
|
||||||
|
value = title)
|
||||||
|
iq['pubsub']['configure']['form'].addField('pubsub#type',
|
||||||
|
ftype = 'text-single',
|
||||||
|
value = NS_ATOM)
|
||||||
|
iq['pubsub']['configure']['form'].addField('pubsub#description',
|
||||||
|
ftype = 'text-single',
|
||||||
|
value = description)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print iq.send(timeout=5)
|
||||||
|
except:
|
||||||
|
print 'Iq Error'
|
||||||
|
|
||||||
|
def publish(self, server, node, entry):
|
||||||
|
|
||||||
|
iq = self.Iq(stype="set", sto = server)
|
||||||
|
iq['pubsub']['publish']['node'] = node
|
||||||
|
|
||||||
|
item = pubsub.Item()
|
||||||
|
item['id'] = entry.id
|
||||||
|
|
||||||
|
#payload = ET.Item()
|
||||||
|
ent = ET.Element("entry")
|
||||||
|
ent.set('xmlns', NS_ATOM)
|
||||||
|
|
||||||
|
title = ET.SubElement(ent, "title")
|
||||||
|
title.text = entry.title
|
||||||
|
|
||||||
|
updated = ET.SubElement(ent, "updated")
|
||||||
|
updated.text = entry.updated
|
||||||
|
|
||||||
|
if(hasattr(entry.content[0], 'type')):
|
||||||
|
content = ET.SubElement(ent, "content")
|
||||||
|
content.set('type', entry.content[0].type)
|
||||||
|
|
||||||
|
#document, errors = tidy_document()
|
||||||
|
content.text = entry.content[0].value
|
||||||
|
|
||||||
|
if(hasattr(entry, 'links')):
|
||||||
|
for l in entry.links:
|
||||||
|
link = ET.SubElement(ent, "link")
|
||||||
|
link.set('href', l['href'])
|
||||||
|
link.set('type', l['type'])
|
||||||
|
link.set('rel', l['rel'])
|
||||||
|
|
||||||
|
|
||||||
|
item['payload'] = ent
|
||||||
|
|
||||||
|
iq['pubsub']['publish'].append(item)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print iq.send(timeout=5)
|
||||||
|
except:
|
||||||
|
print 'Iq Error'
|
||||||
|
def published():
|
||||||
|
print 'published'
|
Loading…
x
Reference in New Issue
Block a user