- Add the base files
This commit is contained in:
parent
2b3dca494f
commit
5141a2a85a
30
README.md
30
README.md
@ -1,4 +1,28 @@
|
||||
atomtopubsub
|
||||
============
|
||||
== About ==
|
||||
|
||||
A little client that par Atom feeds and send them on XMPP Pubsub Nodes
|
||||
AtomToPubsub is a simple Python software that parse Atom feeds and push
|
||||
the entries on a XMPP Pubsub Node (http://xmpp.org/extensions/xep-0060.html)
|
||||
|
||||
== Installation ==
|
||||
|
||||
AtomToPubsub is built using Python 2.6 and use the librairies :
|
||||
- feedparser
|
||||
- time
|
||||
- pickle
|
||||
- sleekxmpp (version > 1.0, you can download and install it from here http://sleekxmpp.com/)
|
||||
- sys
|
||||
|
||||
== Configuration ==
|
||||
|
||||
Rename config_default.py to config.py and set your Atom feeds and your
|
||||
XMPP account configuration.
|
||||
|
||||
The XMPP account must be authorized to create Pubsub node on the server(s).
|
||||
|
||||
== Features ==
|
||||
|
||||
- The "key" of each feed of the configuration file will be the name of
|
||||
the Pubsub node
|
||||
- AtomToPubsub will try to fill the title and the description of the
|
||||
Pubsub node from the title and the subtitle of the Atom node
|
||||
- A cache file is created for performance issues
|
||||
|
94
atomtopubsub.py
Executable file
94
atomtopubsub.py
Executable file
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import feedparser
|
||||
import time
|
||||
import pickle
|
||||
|
||||
import publishx
|
||||
import config
|
||||
|
||||
import logging
|
||||
|
||||
log = logging.getLogger('sleekxmpp')
|
||||
log.setLevel(logging.INFO)
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
ch.setFormatter(formatter)
|
||||
log.addHandler(ch)
|
||||
|
||||
from socket import error as SocketError
|
||||
from termcolor import colored, cprint
|
||||
|
||||
parsed = {}
|
||||
connected = False
|
||||
xmpp = publishx.publishx(config)
|
||||
|
||||
# We feed the pubsub nodes
|
||||
def parse():
|
||||
reload(config)
|
||||
|
||||
# We parse all the feeds
|
||||
for key, feed in config.feeds.iteritems():
|
||||
print colored('>> parsing %s' % key , 'magenta')
|
||||
f = feedparser.parse(feed['url'])
|
||||
|
||||
if(f.bozo == 1):
|
||||
print 'XML Error'
|
||||
if(hasattr(f.bozo_exception, 'getMessage')):
|
||||
print f.bozo_exception.getMessage()
|
||||
if(hasattr(f.bozo_exception, 'getLineNumber')):
|
||||
print 'at line %s' % f.bozo_exception.getLineNumber()
|
||||
|
||||
if(not key in parsed):
|
||||
xmpp.create(feed['server'], key, f.feed)
|
||||
|
||||
# We check if we have some new entries
|
||||
for entry in f.entries:
|
||||
if key not in parsed or parsed[key] < entry.updated_parsed:
|
||||
print colored('++ new entry %s' % entry.title, 'green')
|
||||
else:
|
||||
print colored('++ update entry %s' % entry.title, 'yellow')
|
||||
xmpp.publish(feed['server'], key, entry)
|
||||
|
||||
# And we update the last updated date for the feed
|
||||
if(f is not None and hasattr(f, 'updated_parsed')) :
|
||||
parsed[key] = f.updated_parsed
|
||||
else:
|
||||
print colored('-- Parse failed for %s' % key, 'red')
|
||||
|
||||
save()
|
||||
|
||||
# We distribute the parsing
|
||||
print colored('Parsing next feed in %.2f minutes' % (float(config.refresh_time)/len(config.feeds)), 'cyan')
|
||||
time.sleep((float(config.refresh_time) * 60)/len(config.feeds))
|
||||
|
||||
def load():
|
||||
try:
|
||||
pkl_file = open('cache.pkl', 'rb')
|
||||
parsed = pickle.load(pkl_file)
|
||||
pkl_file.close()
|
||||
return parsed
|
||||
except IOError:
|
||||
print 'Creating the cache'
|
||||
return save()
|
||||
|
||||
def save():
|
||||
output = open('cache.pkl', 'wb')
|
||||
pickle.dump(parsed, output)
|
||||
output.close()
|
||||
return {}
|
||||
|
||||
parsed = load()
|
||||
connected = xmpp.connect()
|
||||
xmpp.process()
|
||||
|
||||
if(connected) :
|
||||
while(1):
|
||||
try:
|
||||
parse()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
xmpp.disconnect(wait=True)
|
||||
print "Exiting..."
|
||||
break
|
29
config_default.py
Executable file
29
config_default.py
Executable file
@ -0,0 +1,29 @@
|
||||
# The feeds, /!\ Put Atom feeds only
|
||||
feeds = {
|
||||
'YIFY' : {
|
||||
'url' : 'http://localhost/feedcleaner/?url=http://yify-torrents.com/rss',
|
||||
'server' : 'pubsub.movim.eu'
|
||||
},
|
||||
|
||||
'LEquipe' : {
|
||||
'url' : 'http://localhost/feedcleaner/?url=http://www.lequipe.fr/rss/actu_rss.xml',
|
||||
'server' : 'sport.mov.im'
|
||||
},
|
||||
'SportingNews' : {
|
||||
'url' : 'http://localhost/feedcleaner/?url=http://www.sportingnews.com/rss',
|
||||
'server' : 'sport.mov.im'
|
||||
},
|
||||
|
||||
'OuestFrance' : {
|
||||
'url' : 'http://localhost/feedcleaner/?url=http://www.ouest-france.fr/rss.xml',
|
||||
'server' : 'news.mov.im'
|
||||
}
|
||||
}
|
||||
|
||||
# XMPP
|
||||
jid = 'user@server.tld'
|
||||
resource = 'atomtopubsub'
|
||||
secret = 'password'
|
||||
|
||||
# Refresh intervals in minutes
|
||||
refresh_time = 15
|
114
publishx.py
Normal file
114
publishx.py
Normal file
@ -0,0 +1,114 @@
|
||||
import sys
|
||||
import logging
|
||||
import getpass
|
||||
from optparse import OptionParser
|
||||
from termcolor import colored, cprint
|
||||
|
||||
#from sleekxmpp.xmlstream.stanzabase import ET
|
||||
|
||||
import sleekxmpp
|
||||
from sleekxmpp.xmlstream import ET, tostring
|
||||
import sleekxmpp.plugins.xep_0060.stanza.pubsub as pubsub
|
||||
|
||||
# Python versions before 3.0 do not use UTF-8 encoding
|
||||
# by default. To ensure that Unicode is handled properly
|
||||
# throughout SleekXMPP, we will set the default encoding
|
||||
# ourselves to UTF-8.
|
||||
if sys.version_info < (3, 0):
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
else:
|
||||
raw_input = input
|
||||
|
||||
NS_ATOM = 'http://www.w3.org/2005/Atom'
|
||||
NS_JABBER_DATA = 'jabber:x:data'
|
||||
|
||||
class publishx(sleekxmpp.ClientXMPP):
|
||||
def __init__(self, config):
|
||||
jid = config.jid
|
||||
fulljid = config.jid + "/" + config.resource
|
||||
secret = config.secret
|
||||
resource = config.resource
|
||||
|
||||
sleekxmpp.ClientXMPP.__init__(self, fulljid, secret)
|
||||
|
||||
self.add_event_handler("session_start", self.start)
|
||||
self.register_plugin('xep_0060')
|
||||
|
||||
def start(self, event):
|
||||
self.send_presence(pshow='chat', pstatus= 'AtomToPubsub')
|
||||
self.get_roster()
|
||||
|
||||
def create(self, server, node, feed):
|
||||
title = description = logo = ''
|
||||
|
||||
if(hasattr(feed, 'title')):
|
||||
title = feed.title
|
||||
if(hasattr(feed, 'subtitle')):
|
||||
description = feed.subtitle
|
||||
print colored('>> create %s' % title, 'blue')
|
||||
|
||||
iq = self.Iq(stype="set", sto = server)
|
||||
iq['pubsub']['create']['node'] = node
|
||||
iq['pubsub']['configure']['form']['type'] = 'submit'
|
||||
iq['pubsub']['configure']['form'].addField('pubsub#persist_items',
|
||||
ftype = 'boolean',
|
||||
value = 1)
|
||||
iq['pubsub']['configure']['form'].addField('pubsub#title',
|
||||
ftype = 'text-single',
|
||||
value = title)
|
||||
iq['pubsub']['configure']['form'].addField('pubsub#type',
|
||||
ftype = 'text-single',
|
||||
value = NS_ATOM)
|
||||
iq['pubsub']['configure']['form'].addField('pubsub#description',
|
||||
ftype = 'text-single',
|
||||
value = description)
|
||||
|
||||
try:
|
||||
print iq.send(timeout=5)
|
||||
except:
|
||||
print 'Iq Error'
|
||||
|
||||
def publish(self, server, node, entry):
|
||||
|
||||
iq = self.Iq(stype="set", sto = server)
|
||||
iq['pubsub']['publish']['node'] = node
|
||||
|
||||
item = pubsub.Item()
|
||||
item['id'] = entry.id
|
||||
|
||||
#payload = ET.Item()
|
||||
ent = ET.Element("entry")
|
||||
ent.set('xmlns', NS_ATOM)
|
||||
|
||||
title = ET.SubElement(ent, "title")
|
||||
title.text = entry.title
|
||||
|
||||
updated = ET.SubElement(ent, "updated")
|
||||
updated.text = entry.updated
|
||||
|
||||
if(hasattr(entry.content[0], 'type')):
|
||||
content = ET.SubElement(ent, "content")
|
||||
content.set('type', entry.content[0].type)
|
||||
|
||||
#document, errors = tidy_document()
|
||||
content.text = entry.content[0].value
|
||||
|
||||
if(hasattr(entry, 'links')):
|
||||
for l in entry.links:
|
||||
link = ET.SubElement(ent, "link")
|
||||
link.set('href', l['href'])
|
||||
link.set('type', l['type'])
|
||||
link.set('rel', l['rel'])
|
||||
|
||||
|
||||
item['payload'] = ent
|
||||
|
||||
iq['pubsub']['publish'].append(item)
|
||||
|
||||
try:
|
||||
print iq.send(timeout=5)
|
||||
except:
|
||||
print 'Iq Error'
|
||||
def published():
|
||||
print 'published'
|
Loading…
x
Reference in New Issue
Block a user