Files
Newsfeed/archive.py
2026-01-29 18:09:56 -05:00

160 lines
6.2 KiB
Python
Executable File

import os
import glob
import functools
from environment import *
from utility import *
from video import *
# This file is executed in a cron job.
# To view the cron schedule type sudo crontab -r in a shell. Use Ctrl-S to save after editing
# This cron job should run evrry 30 minutes. Shorter intervals burden the system
# The ouptut from the print statements is generated in the syslog /var/log/syslog sudo nano /var/log/syslog
# Overall system perfromance can be monitored using htop
def comparator(item1, item2):
try:
list1=item1.split('.')
list2=item2.split('.')
index1=int(list1[len(list1)-1])
index2=int(list2[len(list2)-1])
if index1<index2:
return -1
elif index1>index2:
return 1
return 0
except:
return 0
def createArchive(pathOutputFile,tokens,files):
lines=0
unique={}
videos={}
for token in tokens:
print('Filtering for "{token}"'.format(token=token))
videos = Video.load(pathOutputFile)
for video in list(videos.values()):
description = description=createDescription(video.description,video.getTimestamp())
if not description in unique:
unique[description]=createDescription(video.description, video.getTimestamp())
try:
print('found {count} archive files.'.format(count=len(files)))
print('processing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
for file in files:
try:
with open(file, "r", encoding='utf-8') as inputStream:
for line in inputStream:
lowerLine=line.lower()
for token in tokens:
token=token.lower()
result = lowerLine.find(token)
if -1 != result:
video = Video.fromString(line)
heading = video.getDescription()
if not heading in unique:
unique[heading]=heading
video = Video.fromString(line)
video.description=createDescription(video.description,video.getTimestamp())
videos[video.description]=video
lines = lines + 1
inputStream.close()
except Exception as exception:
print('Exception reading {file} {exception}'.format(file=file,exception=exception))
continue
print('writing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
Video.write(pathOutputFile, videos)
except Exception as exception:
print('Exception creating output file {file} {exception}'.format(file=pathOutputFile,exception=exception))
return
# clean the archive files by removing files older than 'expiryDays'
def cleanArchive(files, expiryDays):
expiredList = []
for pathFileName in files:
modification_date = os.path.getmtime(pathFileName)
modification_date = datetime.fromtimestamp(modification_date, timezone.utc)
now = DateTime.now()
days, hours, minutes, seconds = DateTime.deltaTime(modification_date, now)
if(days > expiryDays):
expiredList.append(pathFileName)
print('Expiring {count} files.'.format(count=len(expiredList)))
for file in expiredList:
os.remove(file)
return
def createDescription(strDescription, timeStamp):
textElement=StringHelper.betweenString(strDescription,None,'-')
timeElement=StringHelper.betweenString(strDescription,'-',None)
durationElement=StringHelper.betweenString(timeElement,' ',' ')
newDescription=textElement+'-'+' '+ durationElement+' ('+timeStamp.toStringMonthDay()+')'
return newDescription
def getFiles(archiveFileLike):
files = glob.glob(archiveFileLike)
files=files+glob.glob(archiveFileLike+'.*')
return files
# This program runs through all of the videodb*.txt files looking for keywords with which to
# build each of the individually named mini-archives.
# 1) Search for all videodb.txt.* files
# 2) Expire files older than specified number of days
# 3) Load the archive (for each fo the types enumerated below)
# 4) Run through file collection for the given archive archive and append to the archive as tags are found
# 5) Sort the archive
# 6) Truncate existing archive if it exists
# 7) Write the new archive
path=PATH_VIDEO_DATABASE
archiveFile=path+'/videodb'
archiveFileLike=archiveFile+'.txt'
#For debugging
# path='/home/pi/Projects/Python/NewsFeed/Archive'
# archiveFile=path+'/videodb'
# archiveFileLike=archiveFile+'.txt'
files = getFiles(archiveFileLike)
print('There are {count} archive files to process before cleaning'.format(count=len(files)))
cleanArchive(files, 30)
files = getFiles(archiveFileLike)
print('There are {count} archive files to process after cleaning'.format(count=len(files)))
print('archive.py running...')
archiveFileName=ARCHIVEDB_FILENAME
pathOutputFile=PathHelper.makePathFileName(archiveFileName,path)
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
tokens=["Keane","Jesse","Israel","Hamas"," War ","Iran","Hezzbollah","Gaza","Ukraine"]
createArchive(pathOutputFile,tokens,files)
hannityFileName=HANNITYARCHIVEDB_FILENAME
pathOutputFile=PathHelper.makePathFileName(hannityFileName,path)
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
tokens=["Hannity"]
createArchive(pathOutputFile,tokens,files)
levinFileName=LEVINARCHIVEDB_FILENAME
pathOutputFile=PathHelper.makePathFileName(levinFileName,path)
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
tokens=["Levin"]
createArchive(pathOutputFile,tokens,files)
hawleyFileName=HAWLEYARCHIVEDB_FILENAME
pathOutputFile=PathHelper.makePathFileName(hawleyFileName,path)
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
tokens=["Hawley"]
createArchive(pathOutputFile,tokens,files)
militaryFileName=MILITARYARCHIVEDB_FILENAME
pathOutputFile=PathHelper.makePathFileName(militaryFileName,path)
print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
tokens=["Keane","Kellogg","Russia","Ukraine","Israel","Korea","Iran","Venezuela","Cuba","China"]
createArchive(pathOutputFile,tokens,files)
print('archive.py done.')