Initial Commit

2026-01-29 18:09:56 -05:00
parent 150d57c782
commit 38f58fa144
10 changed files with 1790 additions and 0 deletions
--- a/archive.py
+++ b/archive.py
@@ -0,0 +1,159 @@
+import os
+import glob
+import functools
+from environment import *
+from utility import *
+from video import *
+
+# This file is executed in a cron job.
+# To view the cron schedule type sudo crontab -r in a shell.  Use Ctrl-S to save after editing
+# This cron job should run evrry 30 minutes.  Shorter intervals burden the system
+# The ouptut from the print statements is generated in the syslog /var/log/syslog    sudo nano /var/log/syslog
+# Overall system perfromance can be monitored using htop
+
+def comparator(item1, item2):
+    try:
+      list1=item1.split('.')
+      list2=item2.split('.')
+      index1=int(list1[len(list1)-1])
+      index2=int(list2[len(list2)-1])
+      if index1<index2:
+        return -1
+      elif index1>index2:
+        return 1
+      return 0
+    except:
+      return 0
+
+def createArchive(pathOutputFile,tokens,files):
+   lines=0
+   unique={}
+   videos={}
+
+   for token in tokens:
+     print('Filtering for "{token}"'.format(token=token))
+
+   videos = Video.load(pathOutputFile) 
+
+   for video in list(videos.values()):
+     description = description=createDescription(video.description,video.getTimestamp())
+     if not description in unique:
+       unique[description]=createDescription(video.description, video.getTimestamp())
+
+   try:
+     print('found {count} archive files.'.format(count=len(files)))
+     print('processing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+     for file in files:
+        try:
+          with open(file, "r", encoding='utf-8') as inputStream:
+              for line in inputStream:
+                lowerLine=line.lower()
+                for token in tokens:
+                  token=token.lower()
+                  result = lowerLine.find(token)
+                  if -1 != result:
+                    video = Video.fromString(line)
+                    heading = video.getDescription()
+                    if not heading in unique:
+                      unique[heading]=heading
+                      video = Video.fromString(line)
+                      video.description=createDescription(video.description,video.getTimestamp())
+                      videos[video.description]=video
+                      lines = lines + 1
+          inputStream.close()
+        except Exception as exception:
+          print('Exception reading {file} {exception}'.format(file=file,exception=exception))
+          continue
+     print('writing {pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+     Video.write(pathOutputFile, videos)   
+   except Exception as exception:
+     print('Exception creating output file {file} {exception}'.format(file=pathOutputFile,exception=exception))
+   return
+
+# clean the archive files by removing files older than 'expiryDays'
+def cleanArchive(files, expiryDays):
+  expiredList = []
+  for pathFileName in files:
+    modification_date = os.path.getmtime(pathFileName)
+    modification_date = datetime.fromtimestamp(modification_date, timezone.utc)
+    now = DateTime.now()
+    days, hours, minutes, seconds = DateTime.deltaTime(modification_date, now)
+    if(days > expiryDays):
+      expiredList.append(pathFileName)
+  print('Expiring {count} files.'.format(count=len(expiredList)))
+  for file in expiredList:
+    os.remove(file)
+  return
+
+def createDescription(strDescription, timeStamp):
+  textElement=StringHelper.betweenString(strDescription,None,'-')
+  timeElement=StringHelper.betweenString(strDescription,'-',None)
+  durationElement=StringHelper.betweenString(timeElement,' ',' ')
+  newDescription=textElement+'-'+' '+ durationElement+' ('+timeStamp.toStringMonthDay()+')'
+  return newDescription
+
+def getFiles(archiveFileLike):
+  files = glob.glob(archiveFileLike)
+  files=files+glob.glob(archiveFileLike+'.*')
+  return files
+
+# This program runs through all of the videodb*.txt files looking for keywords with which to
+# build each of the individually named mini-archives.
+# 1) Search for all videodb.txt.* files 
+# 2) Expire files older than specified number of days
+# 3) Load the archive (for each fo the types enumerated below)
+# 4) Run through file collection for the given archive archive and append to the archive as tags are found
+# 5) Sort the archive
+# 6) Truncate existing archive if it exists
+# 7) Write the new archive
+
+path=PATH_VIDEO_DATABASE
+archiveFile=path+'/videodb'
+archiveFileLike=archiveFile+'.txt'
+
+#For debugging
+# path='/home/pi/Projects/Python/NewsFeed/Archive'
+# archiveFile=path+'/videodb'
+# archiveFileLike=archiveFile+'.txt'
+
+files = getFiles(archiveFileLike)
+print('There are {count} archive files to process before cleaning'.format(count=len(files)))
+cleanArchive(files, 30)
+files = getFiles(archiveFileLike)
+print('There are {count} archive files to process after cleaning'.format(count=len(files)))
+
+print('archive.py running...')
+
+archiveFileName=ARCHIVEDB_FILENAME
+pathOutputFile=PathHelper.makePathFileName(archiveFileName,path)
+print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+tokens=["Keane","Jesse","Israel","Hamas"," War ","Iran","Hezzbollah","Gaza","Ukraine"]
+createArchive(pathOutputFile,tokens,files)
+
+hannityFileName=HANNITYARCHIVEDB_FILENAME
+pathOutputFile=PathHelper.makePathFileName(hannityFileName,path)
+print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+tokens=["Hannity"]
+createArchive(pathOutputFile,tokens,files)
+
+levinFileName=LEVINARCHIVEDB_FILENAME
+pathOutputFile=PathHelper.makePathFileName(levinFileName,path)
+print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+tokens=["Levin"]
+createArchive(pathOutputFile,tokens,files)
+
+hawleyFileName=HAWLEYARCHIVEDB_FILENAME
+pathOutputFile=PathHelper.makePathFileName(hawleyFileName,path)
+print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+tokens=["Hawley"]
+createArchive(pathOutputFile,tokens,files)
+
+militaryFileName=MILITARYARCHIVEDB_FILENAME
+pathOutputFile=PathHelper.makePathFileName(militaryFileName,path)
+print('pathOutputFile={pathOutputFile}'.format(pathOutputFile=pathOutputFile))
+tokens=["Keane","Kellogg","Russia","Ukraine","Israel","Korea","Iran","Venezuela","Cuba","China"]
+createArchive(pathOutputFile,tokens,files)
+
+print('archive.py done.')
+
+