#!/usr/bin/env python # Hello, this script is written in Python - http://www.python.org # # newsarchiver 1.1p - Newsgroup archiver # # Purpose: # This script will download all available message from the desired Usenet group # and save them as plain text files. Usefull for bulk group archiving. # # Usage: # # Syntaxe : python newsarchiver.py [firstArticle] # # where groupname is the group name (eg. comp.lang.python) # firstArticle is the article number to fetch from (optional) # # Example 1 : python newsarchiver.py comp.lang.python # (fetch all article available from comp.lang.python) # # Example 2 : python newsarchiver.py comp.lang.python 108224 # (fetch all article available from comp.lang.python # starting with article number 108224) # # Password will be asked when the script is run. # # Server name, login and destination directory are hardcoded. # Tweak the lines below the 'import' statement to suit your needs. # Variable names should be self-explanatory. # # Then run this script with and enter your password for the corresponding login. # This script will then connect to news server and start fetching messages. # # You can stop this script at anytime (break with CTRL+C) # and re-run it later to continue to fetch messages. # This script will not fetch message it has already fetched. # # All messages will be saved as individual files in the form: # groupname_messageNumber # (with dots replaced by underscores) # ( eg : comp_lang_python_104208 ) # # Keep in mind that 'messageNumber' is server-dependant. # (If you change newsserver, the messageNumber will be different : you will # have to erase all files and fetch them all to have a coherent fileset) # The messageNumber matches the Xref reference number in each message. # # Group must exist on server. # # Changes: # 1.0p : - first version # 1.1p : - added group name and first article number as command-line parameters. # - added help screen # # Author's comments: # Oh my, I wouldn't beleive this would be so easy to program... thanks to Python ! # # Credits: # I created this script for 2 purposes: # - train Python programming (this is one of my first Python scripts) # - archive comp.lang.python and other interesting newsgroups. # # This author of this script is Sebastien SAUVAGE # http://sebsauvage.net # Other quick & dirty Python stuff is likely to be available at http://sebsauvage.net/python/ # # Legal: # This script is public domain. Feel free to re-use and tweak the code. # # Originally from http://sebsauvage.net/python/newsarchiver.py # Tweaked 2008-12-08 by Ian Goldberg to remove # the requirement of a username/password for the NNTP server and to # pad the article numbers with 0s in the filename so that they sort # properly # import os.path,nntplib,string,getpass,sys destination = './' # Must have a trailing slash newsserver = 'news.math.uwaterloo.ca' if len( sys.argv ) < 2: print '>>> newsArchiver 1.1p\n' print ' *** IMPORTANT ***' print ' See comments in code for more information before running this script !' print ' (News server address is hardcoded :' print ' you need to tailor it before using this script.)' print ' News server',newsserver,"will be used" print ' Destination path is',destination,'\n' print ' Syntax : python newsarchiver.py [firstArticle]\n' print ' Example 1 : python newsarchiver.py comp.lang.python' print ' (fetch all article available from comp.lang.python)\n' print ' Example 2 : python newsarchiver.py comp.lang.python 108224' print ' (fetch all article available from comp.lang.python' print ' starting with article number 108224)\n' sys.exit() groupName = sys.argv[1] firstArticle = 0 if len( sys.argv ) > 2: try: firstArticle = int(sys.argv[2]) except: print 'Error : firstArticle parameters must be numeric.' sys.exit() print '>>> Connecting to news server',newsserver,'...' try: ns = nntplib.NNTP(newsserver,119) except: print '>>> Could not connect to news server.' else: print '>>> News server welcomes us:' print ns.getwelcome() print '>>> Accessing group', groupName try: group = ns.group(groupName) except: print '>>> Could not open group',groupName else: count = group[1] # nb of articles available on server first = group[2] # ID of first available article last = group[3] # ID of last available article print '>>> Article count :',count print '>>> First :',first print '>>> Last :',last if (firstArticle > int(first)) and (firstArticle <= int(last)): first = str(firstArticle) print '>>> Fetching from article',first for articleNumber in range(int(first),int(last)+1): fileName = destination+string.replace(groupName+'.'+("%05d" % articleNumber),'.','_') if not os.path.isfile( fileName ): print '>>> Fetching article',articleNumber,'out of',last,'from',groupName try: article = ns.article(str(articleNumber)) except: print '>>> Could not fetch article',articleNumber else: f=open(fileName, 'w+') for line in article[3]: f.write(line+'\n') f.close() print '>>> Closing connection with news server...' ns.quit()