Initial Commit
This commit is contained in:
159
archive.py
Executable file
159
archive.py
Executable file
@@ -0,0 +1,159 @@
|
||||
import os
|
||||
import glob
|
||||
import functools
|
||||
from environment import *
|
||||
from utility import *
|
||||
from video import *
|
||||
|
||||
# This file is executed in a cron job.
|
||||
# To view the cron schedule type sudo crontab -r in a shell. Use Ctrl-S to save after editing
|
||||
# This cron job should run evrry 30 minutes. Shorter intervals burden the system
|
||||
# The ouptut from the print statements is generated in the syslog /var/log/syslog sudo nano /var/log/syslog
|
||||
# Overall system perfromance can be monitored using htop
|
||||
|
||||
def comparator(item1, item2):
|
||||
try:
|
||||
list1=item1.split('.')
|
||||
list2=item2.split('.')
|
||||
index1=int(list1[len(list1)-1])
|
||||
index2=int(list2[len(list2)-1])
|
||||
if index1<index2:
|
||||
return -1
|
||||
elif index1>index2:
|
||||
return 1
|
||||
return 0
|
||||
except:
|
||||
return 0
|
||||
|
||||
def createArchive(pathOutputFile,tokens,files):
|
||||
lines=0
|
||||
unique={}
|
||||
videos={}
|
||||
|
||||
for token in tokens:
|
||||
print('Filtering for "{token}"'.format(token=token))
|
||||
|
||||
videos = Video.load(pathOutputFile)
|
||||
|
||||
for video in list(videos.values()):
|
||||
description = description=createDescription(video.description,video.getTimestamp())
|
||||
if not description in unique:
|
||||
unique[description]=createDescription(video.description, video.getTimestamp())
|
||||
|
||||
try:
|
||||
print('found {count} archive files.'.format(count=len(files)))
|
||||
print('processing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
for file in files:
|
||||
try:
|
||||
with open(file, "r", encoding='utf-8') as inputStream:
|
||||
for line in inputStream:
|
||||
lowerLine=line.lower()
|
||||
for token in tokens:
|
||||
token=token.lower()
|
||||
result = lowerLine.find(token)
|
||||
if -1 != result:
|
||||
video = Video.fromString(line)
|
||||
heading = video.getDescription()
|
||||
if not heading in unique:
|
||||
unique[heading]=heading
|
||||
video = Video.fromString(line)
|
||||
video.description=createDescription(video.description,video.getTimestamp())
|
||||
videos[video.description]=video
|
||||
lines = lines + 1
|
||||
inputStream.close()
|
||||
except Exception as exception:
|
||||
print('Exception reading {file} {exception}'.format(file=file,exception=exception))
|
||||
continue
|
||||
print('writing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
Video.write(pathOutputFile, videos)
|
||||
except Exception as exception:
|
||||
print('Exception creating output file {file} {exception}'.format(file=pathOutputFile,exception=exception))
|
||||
return
|
||||
|
||||
# clean the archive files by removing files older than 'expiryDays'
|
||||
def cleanArchive(files, expiryDays):
|
||||
expiredList = []
|
||||
for pathFileName in files:
|
||||
modification_date = os.path.getmtime(pathFileName)
|
||||
modification_date = datetime.fromtimestamp(modification_date, timezone.utc)
|
||||
now = DateTime.now()
|
||||
days, hours, minutes, seconds = DateTime.deltaTime(modification_date, now)
|
||||
if(days > expiryDays):
|
||||
expiredList.append(pathFileName)
|
||||
print('Expiring {count} files.'.format(count=len(expiredList)))
|
||||
for file in expiredList:
|
||||
os.remove(file)
|
||||
return
|
||||
|
||||
def createDescription(strDescription, timeStamp):
|
||||
textElement=StringHelper.betweenString(strDescription,None,'-')
|
||||
timeElement=StringHelper.betweenString(strDescription,'-',None)
|
||||
durationElement=StringHelper.betweenString(timeElement,' ',' ')
|
||||
newDescription=textElement+'-'+' '+ durationElement+' ('+timeStamp.toStringMonthDay()+')'
|
||||
return newDescription
|
||||
|
||||
def getFiles(archiveFileLike):
|
||||
files = glob.glob(archiveFileLike)
|
||||
files=files+glob.glob(archiveFileLike+'.*')
|
||||
return files
|
||||
|
||||
# This program runs through all of the videodb*.txt files looking for keywords with which to
|
||||
# build each of the individually named mini-archives.
|
||||
# 1) Search for all videodb.txt.* files
|
||||
# 2) Expire files older than specified number of days
|
||||
# 3) Load the archive (for each fo the types enumerated below)
|
||||
# 4) Run through file collection for the given archive archive and append to the archive as tags are found
|
||||
# 5) Sort the archive
|
||||
# 6) Truncate existing archive if it exists
|
||||
# 7) Write the new archive
|
||||
|
||||
path=PATH_VIDEO_DATABASE
|
||||
archiveFile=path+'/videodb'
|
||||
archiveFileLike=archiveFile+'.txt'
|
||||
|
||||
#For debugging
|
||||
# path='/home/pi/Projects/Python/NewsFeed/Archive'
|
||||
# archiveFile=path+'/videodb'
|
||||
# archiveFileLike=archiveFile+'.txt'
|
||||
|
||||
files = getFiles(archiveFileLike)
|
||||
print('There are {count} archive files to process before cleaning'.format(count=len(files)))
|
||||
cleanArchive(files, 30)
|
||||
files = getFiles(archiveFileLike)
|
||||
print('There are {count} archive files to process after cleaning'.format(count=len(files)))
|
||||
|
||||
print('archive.py running...')
|
||||
|
||||
archiveFileName=ARCHIVEDB_FILENAME
|
||||
pathOutputFile=PathHelper.makePathFileName(archiveFileName,path)
|
||||
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
tokens=["Keane","Jesse","Israel","Hamas"," War ","Iran","Hezzbollah","Gaza","Ukraine"]
|
||||
createArchive(pathOutputFile,tokens,files)
|
||||
|
||||
hannityFileName=HANNITYARCHIVEDB_FILENAME
|
||||
pathOutputFile=PathHelper.makePathFileName(hannityFileName,path)
|
||||
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
tokens=["Hannity"]
|
||||
createArchive(pathOutputFile,tokens,files)
|
||||
|
||||
levinFileName=LEVINARCHIVEDB_FILENAME
|
||||
pathOutputFile=PathHelper.makePathFileName(levinFileName,path)
|
||||
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
tokens=["Levin"]
|
||||
createArchive(pathOutputFile,tokens,files)
|
||||
|
||||
hawleyFileName=HAWLEYARCHIVEDB_FILENAME
|
||||
pathOutputFile=PathHelper.makePathFileName(hawleyFileName,path)
|
||||
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
tokens=["Hawley"]
|
||||
createArchive(pathOutputFile,tokens,files)
|
||||
|
||||
militaryFileName=MILITARYARCHIVEDB_FILENAME
|
||||
pathOutputFile=PathHelper.makePathFileName(militaryFileName,path)
|
||||
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
|
||||
tokens=["Keane","Kellogg","Russia","Ukraine","Israel","Korea","Iran","Venezuela","Cuba","China"]
|
||||
createArchive(pathOutputFile,tokens,files)
|
||||
|
||||
print('archive.py done.')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user