Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| af5a989c21 | |||
| b860c2d0ef | |||
| 09e3980d2c | |||
| 1cdedee244 | |||
| b55a299a3c | |||
| 2fc96a3cc4 | |||
| 7bb844d9ee | |||
| 0ec1eaef39 | |||
| 6e521c382c | |||
| cb5a1bfbbe | |||
| 98c37f2204 | |||
| 4cb76dfb58 | |||
| e660e385e5 | |||
| 13b18b01dd | |||
| 91383d8687 |
@@ -6,8 +6,8 @@ from utility import *
|
||||
from video import *
|
||||
|
||||
# This file is executed in a cron job.
|
||||
# To view the cron schedule type sudo crontab -r in a shell. Use Ctrl-S to save after editing
|
||||
# This cron job should run evrry 30 minutes. Shorter intervals burden the system
|
||||
# To view the cron schedule type sudo crontab -l in a shell. sudo crontab -e for editing. Use Ctrl-S to save after editing
|
||||
# This cron job should run evrry 10 minutes. Shorter intervals burden the system
|
||||
# The ouptut from the print statements is generated in the syslog /var/log/syslog sudo nano /var/log/syslog
|
||||
# Overall system perfromance can be monitored using htop
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ CACHE_EXPIRY_MINS=10
|
||||
|
||||
LOG_HTTP_RESPONSES = False
|
||||
|
||||
FEED_REJECT_IF_OLDER_THAN_DAYS = 7
|
||||
FEED_REJECT_IF_OLDER_THAN_DAYS = 60
|
||||
|
||||
class PathHelper:
|
||||
pathChar="/"
|
||||
|
||||
659
newsfeed.py
659
newsfeed.py
@@ -30,94 +30,125 @@ class NewsFeed:
|
||||
return False
|
||||
|
||||
def getItemsInAmericasNewsRoomFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!= response:
|
||||
response.close()
|
||||
|
||||
def getItemsInOutnumberedFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
self.writeLog(f"Loading videos from cache {cachePathFileName}")
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
self.writeLog(f"Loading videos from {url}")
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
# videoList=list(videos.values())
|
||||
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
parse_total = 0 # timing
|
||||
time_total = 0 # timing
|
||||
count = 0 # timing
|
||||
try:
|
||||
self.writeLog("getItemsInFeed[ENTER]")
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
self.writeLog(f"[getItemsInFeed] Loading videos from cache {cachePathFileName}")
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
self.writeLog(f"[getItemsInFeed] Loading videos from site '{url}'")
|
||||
start_time = time.perf_counter()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
self.writeLog(f"[getItemsInFeed] Request from {url} completed in {time.perf_counter() - start_time:.4f} seconds.")
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(f"[getItemsInFeed] Request {url}")
|
||||
self.writeLog(f"[getItemsInFeed] Returned {response.text}" )
|
||||
while -1!= searchIndex:
|
||||
t0 = time.perf_counter() # timing
|
||||
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
t1 = time.perf_counter() # timing
|
||||
parse_total += (t1 - t0) # timing
|
||||
if video is not None and not (video.description in videos):
|
||||
t2 = time.perf_counter() # timing
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
t3 = time.perf_counter() # timing
|
||||
time_total += (t3 - t2) # timing
|
||||
videos[video.description]=video
|
||||
count += 1 # timing
|
||||
t0 = time.perf_counter() # timing
|
||||
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
||||
t1 = time.perf_counter()
|
||||
self.writeLog(f"[TIMING] filterFeedMaxDays took {t1 - t0:.4f} seconds")
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
self.writeLog(f"items: {count}") # timing
|
||||
self.writeLog(f"parse_total: {parse_total:.2f}s") # timing
|
||||
self.writeLog(f"time_total: {time_total:.2f}s") # timing
|
||||
self.writeLog("getItemsInFeed[LEAVE]")
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def filterFeedMaxDays(self, videos, days):
|
||||
now = datetime.now()
|
||||
@@ -125,91 +156,100 @@ class NewsFeed:
|
||||
for video in videos:
|
||||
delta = now - video.getFeedTime()
|
||||
if delta.days <= days:
|
||||
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
filteredList.insert(0,video)
|
||||
# message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
# self.writeLog(message)
|
||||
filteredList.append(video)
|
||||
else:
|
||||
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
pass
|
||||
# message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
# self.writeLog(message)
|
||||
return filteredList
|
||||
|
||||
def getUSItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getExclusiveItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.Text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInArchiveFeed(self,url,archiveDbFileName):
|
||||
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
|
||||
@@ -221,11 +261,11 @@ class NewsFeed:
|
||||
def readFeedCache(self,pathFileName):
|
||||
try:
|
||||
videos=[]
|
||||
# 'with' will automatically close the stream
|
||||
with open(pathFileName,"r",encoding='utf-8') as inputStream:
|
||||
for line in inputStream:
|
||||
video=Video.fromString(line)
|
||||
videos.append(video)
|
||||
inputStream.close()
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
@@ -236,35 +276,36 @@ class NewsFeed:
|
||||
with open(pathFileName,"w",encoding='utf-8') as outputStream:
|
||||
for video in videos:
|
||||
outputStream.write(video.toString()+"\n")
|
||||
outputStream.close()
|
||||
# 'with' will automatically close the stream
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
return(videos)
|
||||
|
||||
def isFeedCacheAvailable(self,pathFileName,expireMinutes):
|
||||
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
|
||||
try:
|
||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||
if not os.path.isfile(pathFileName):
|
||||
return(False)
|
||||
modifiedTime=os.path.getmtime(pathFileName)
|
||||
convertTime=time.localtime(modifiedTime)
|
||||
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
|
||||
fileDateTime=DateTimeHelper.strptime(formatTime,'%d%m%Y %H:%M:%S')
|
||||
currentTime=datetime.now()
|
||||
timedelta=currentTime-fileDateTime
|
||||
hours, hremainder = divmod(timedelta.seconds,3600)
|
||||
minutes, mremainder = divmod(timedelta.seconds,60)
|
||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||
if hours > 1 or minutes > expireMinutes:
|
||||
self.archiveFile(pathFileName)
|
||||
return(False)
|
||||
return (True)
|
||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||
if not os.path.isfile(pathFileName):
|
||||
return False
|
||||
modifiedTime = os.path.getmtime(pathFileName)
|
||||
convertTime = time.localtime(modifiedTime)
|
||||
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
|
||||
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
|
||||
currentTime = datetime.now()
|
||||
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
|
||||
totalSeconds = int(elapsed.total_seconds())
|
||||
hours, remainder = divmod(totalSeconds, 3600)
|
||||
minutes, _ = divmod(remainder, 60)
|
||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||
if hours > 1 or minutes > expireMinutes:
|
||||
self.archiveFile(pathFileName)
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
self.writeLog(traceback.format_exc());
|
||||
return(False)
|
||||
|
||||
self.writeLog(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def archiveFile(self, pathFileName):
|
||||
if not os.path.isfile(pathFileName):
|
||||
return(False)
|
||||
@@ -288,64 +329,198 @@ class Sections:
|
||||
def __init__(self):
|
||||
self.dummy=None
|
||||
|
||||
# def getItemsInSection(self, strInput, sectionName, searchIndex):
|
||||
# video=None
|
||||
# startSection='<'+sectionName
|
||||
# endSection='</'+sectionName
|
||||
|
||||
# startIndex=strInput.find(startSection,searchIndex)
|
||||
# if -1 == startIndex:
|
||||
# searchIndex=-1
|
||||
# return video, searchIndex
|
||||
|
||||
# endIndex=strInput.find(endSection,startIndex)
|
||||
# if -1 == endIndex:
|
||||
# searchIndex=-1
|
||||
# return video, searchIndex
|
||||
|
||||
# searchIndex=endIndex+len(endSection)
|
||||
# strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
|
||||
|
||||
# if not strContainingString or strContainingString=="":
|
||||
# return video, searchIndex
|
||||
|
||||
# indexPreview=strContainingString.find("preview=\"")
|
||||
# if -1 == indexPreview:
|
||||
# return video, searchIndex
|
||||
# previewUrl=strContainingString[indexPreview:]
|
||||
# previewUrl=self.betweenString(previewUrl,'"','"')
|
||||
# if "tokenvod" in previewUrl:
|
||||
# return video, searchIndex
|
||||
|
||||
# # Handle video description
|
||||
# indexDescription=strContainingString.find("alt=\"")
|
||||
# if -1 == indexDescription:
|
||||
# return video, searchIndex
|
||||
# description=strContainingString[indexDescription:]
|
||||
# description=self.betweenString(description,'"','"')
|
||||
# description=self.removeHtml(description)
|
||||
# description=description.replace("- Fox News","")
|
||||
# if "vod.foxbusiness" in description:
|
||||
# return video, searchIndex
|
||||
|
||||
# # Handle video duration
|
||||
# indexDuration=strContainingString.find("<div class=\"duration\">")
|
||||
# if -1 != indexDuration:
|
||||
# strDuration=strContainingString[indexDuration:]
|
||||
# strDuration=self.betweenString(strDuration,">","<")
|
||||
# description=description+" - "+strDuration
|
||||
|
||||
# # Handle video publication
|
||||
# strPublication = ""
|
||||
# indexPublication=strContainingString.find("<div class=\"pub-date\">")
|
||||
# if -1 != indexPublication:
|
||||
# strPublication=strContainingString[indexPublication:]
|
||||
# strPublication=self.betweenString(strPublication,"<time>","</time>")
|
||||
# description=description+" ("+strPublication+")"
|
||||
|
||||
# # Handle the icon
|
||||
# icon=None
|
||||
# indexIcon=strContainingString.find("srcset=")
|
||||
# if -1 != indexIcon:
|
||||
# icon=strContainingString[indexIcon:]
|
||||
# icon=self.betweenString(icon,"\"","\"")
|
||||
# splits=icon.split(',')
|
||||
# icon=self.betweenString(splits[len(splits)-1],None,'?')
|
||||
# icon=icon.strip()
|
||||
# description = description.strip()
|
||||
# video=Video(description,previewUrl,icon)
|
||||
# video.feedTimeOffset=strPublication
|
||||
# return video, searchIndex
|
||||
|
||||
def getItemsInSection(self, strInput, sectionName, searchIndex):
|
||||
video=None
|
||||
startSection='<'+sectionName
|
||||
endSection='</'+sectionName
|
||||
video = None
|
||||
|
||||
startIndex=strInput.find(startSection,searchIndex)
|
||||
if -1 == startIndex:
|
||||
searchIndex=-1
|
||||
return video, searchIndex
|
||||
startTag = '<' + sectionName
|
||||
endTag = '</' + sectionName + '>'
|
||||
|
||||
endIndex=strInput.find(endSection,startIndex)
|
||||
if -1 == endIndex:
|
||||
searchIndex=-1
|
||||
return video, searchIndex
|
||||
# ---------------------------------------------------
|
||||
# FIND SECTION BOUNDARIES (FULL DOCUMENT SCAN ONCE)
|
||||
# ---------------------------------------------------
|
||||
startIndex = strInput.find(startTag, searchIndex)
|
||||
if startIndex == -1:
|
||||
return None, -1
|
||||
|
||||
searchIndex=endIndex+len(endSection)
|
||||
strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
|
||||
endIndex = strInput.find(endTag, startIndex)
|
||||
if endIndex == -1:
|
||||
return None, -1
|
||||
|
||||
if not strContainingString or strContainingString=="":
|
||||
return video, searchIndex
|
||||
searchIndex = endIndex + len(endTag)
|
||||
|
||||
# ---------------------------------------------------
|
||||
# SINGLE SLICE PER ARTICLE (KEY PERFORMANCE FIX)
|
||||
# ---------------------------------------------------
|
||||
block = strInput[startIndex:endIndex]
|
||||
|
||||
# ---------------------------------------------------
|
||||
# PREVIEW URL
|
||||
# ---------------------------------------------------
|
||||
previewStart = block.find('preview="')
|
||||
if previewStart == -1:
|
||||
return None, searchIndex
|
||||
|
||||
previewStart += len('preview="')
|
||||
previewEnd = block.find('"', previewStart)
|
||||
if previewEnd == -1:
|
||||
return None, searchIndex
|
||||
|
||||
previewUrl = block[previewStart:previewEnd]
|
||||
|
||||
indexPreview=strContainingString.find("preview=\"")
|
||||
if -1 == indexPreview:
|
||||
return video, searchIndex
|
||||
previewUrl=strContainingString[indexPreview:]
|
||||
previewUrl=self.betweenString(previewUrl,'"','"')
|
||||
if "tokenvod" in previewUrl:
|
||||
return video, searchIndex
|
||||
return None, searchIndex
|
||||
|
||||
# ---------------------------------------------------
|
||||
# DESCRIPTION
|
||||
# ---------------------------------------------------
|
||||
descStart = block.find('alt="')
|
||||
if descStart == -1:
|
||||
return None, searchIndex
|
||||
|
||||
descStart += len('alt="')
|
||||
descEnd = block.find('"', descStart)
|
||||
if descEnd == -1:
|
||||
return None, searchIndex
|
||||
|
||||
description = block[descStart:descEnd]
|
||||
description = self.removeHtml(description)
|
||||
description = description.replace("- Fox News", "")
|
||||
|
||||
indexDescription=strContainingString.index("alt=\"")
|
||||
description=strContainingString[indexDescription:]
|
||||
description=self.betweenString(description,'"','"')
|
||||
description=self.removeHtml(description)
|
||||
description=description.replace("- Fox News","")
|
||||
if "vod.foxbusiness" in description:
|
||||
return video, searchIndex
|
||||
indexDuration=strContainingString.index("<div class=\"duration\">")
|
||||
if -1 != indexDuration:
|
||||
strDuration=strContainingString[indexDuration:]
|
||||
strDuration=self.betweenString(strDuration,">","<")
|
||||
description=description+" - "+strDuration
|
||||
indexPublication=strContainingString.index("<div class=\"pub-date\">")
|
||||
if -1 != indexPublication:
|
||||
strPublication=strContainingString[indexPublication:]
|
||||
strPublication=self.betweenString(strPublication,"<time>","</time>")
|
||||
description=description+" ("+strPublication+")"
|
||||
icon=None
|
||||
indexIcon=strContainingString.index("srcset=")
|
||||
if -1 != indexIcon:
|
||||
icon=strContainingString[indexIcon:]
|
||||
icon=self.betweenString(icon,"\"","\"")
|
||||
splits=icon.split(',')
|
||||
icon=self.betweenString(splits[len(splits)-1],None,'?')
|
||||
icon=icon.strip()
|
||||
return None, searchIndex
|
||||
|
||||
# ---------------------------------------------------
|
||||
# DURATION
|
||||
# ---------------------------------------------------
|
||||
durTag = '<div class="duration">'
|
||||
durStart = block.find(durTag)
|
||||
if durStart != -1:
|
||||
durStart += len(durTag)
|
||||
durEnd = block.find('</div>', durStart)
|
||||
if durEnd != -1:
|
||||
duration = block[durStart:durEnd].strip()
|
||||
description += " - " + duration
|
||||
|
||||
# ---------------------------------------------------
|
||||
# PUBLICATION (ROBUST <time> HANDLING RESTORED)
|
||||
# ---------------------------------------------------
|
||||
strPublication = ""
|
||||
|
||||
pubStart = block.find('<div class="pub-date">')
|
||||
if pubStart != -1:
|
||||
timeOpen = block.find('<time', pubStart)
|
||||
if timeOpen != -1:
|
||||
timeCloseStart = block.find('>', timeOpen)
|
||||
if timeCloseStart != -1:
|
||||
timeCloseEnd = block.find('</time>', timeCloseStart)
|
||||
if timeCloseEnd != -1:
|
||||
strPublication = block[timeCloseStart + 1:timeCloseEnd].strip()
|
||||
|
||||
if strPublication:
|
||||
description += " (" + strPublication + ")"
|
||||
|
||||
# ---------------------------------------------------
|
||||
# ICON
|
||||
# ---------------------------------------------------
|
||||
icon = None
|
||||
|
||||
iconStart = block.find('srcset=')
|
||||
if iconStart != -1:
|
||||
iconStart += len('srcset=')
|
||||
|
||||
quoteStart = block.find('"', iconStart)
|
||||
if quoteStart != -1:
|
||||
quoteStart += 1
|
||||
quoteEnd = block.find('"', quoteStart)
|
||||
|
||||
if quoteEnd != -1:
|
||||
iconStr = block[quoteStart:quoteEnd]
|
||||
|
||||
parts = iconStr.split(',')
|
||||
last = parts[-1].strip()
|
||||
|
||||
q = last.find('?')
|
||||
icon = last if q == -1 else last[:q]
|
||||
|
||||
# ---------------------------------------------------
|
||||
# FINALIZE
|
||||
# ---------------------------------------------------
|
||||
description = description.strip()
|
||||
video=Video(description,previewUrl,icon)
|
||||
video.feedTimeOffset=strPublication
|
||||
|
||||
video = Video(description, previewUrl, icon)
|
||||
video.feedTimeOffset = strPublication
|
||||
|
||||
return video, searchIndex
|
||||
|
||||
|
||||
def getVideoIdInSection(self, strInput, sectionName, searchIndex):
|
||||
video=None
|
||||
@@ -422,12 +597,14 @@ class Sections:
|
||||
for code in codes:
|
||||
strItem=strItem.replace(code,"'")
|
||||
strItem=strItem.replace("&","&")
|
||||
strItem=strItem.replace("‘","'")
|
||||
strItem=strItem.replace("’","'")
|
||||
strItem=strItem.replace("‘","‘")
|
||||
strItem=strItem.replace("’","’")
|
||||
strItem=strItem.replace("—","-")
|
||||
strItem=strItem.replace("'","'")
|
||||
strItem=strItem.replace("???","'")
|
||||
strItem=strItem.replace(""","\"")
|
||||
return strItem
|
||||
|
||||
|
||||
def pad(str,filler,length):
|
||||
stringLength=len(str)
|
||||
sb=""
|
||||
@@ -447,6 +624,19 @@ def parseDuration(strDuration):
|
||||
|
||||
|
||||
# DON'T LEAVE ANYTHING OPEN BELOW THIS LINE BECAUSE THIS FILE IS IMPORTED BY OTHER MODULES AND ANY CODE NOT IN A CLASS WILL BE RUN
|
||||
# strdate = "January 1, 2026"
|
||||
# if DateTimeHelper.canstrptimeex(strdate):
|
||||
# theDate = DateTimeHelper.strptimeex(strdate)
|
||||
# if(not isinstance(theDate,datetime)):
|
||||
# raise Exception('Invalid type for parameter')
|
||||
|
||||
|
||||
|
||||
# feedTimeOffset = "January 13, 2025"
|
||||
# currentTime = datetime.now()
|
||||
# for i in range(1,100):
|
||||
# relativeTime = DateTimeHelper.applyRelativeTime(currentTime,feedTimeOffset)
|
||||
# print(relativeTime)
|
||||
|
||||
#print(FOX_NEWS_URL)
|
||||
# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
|
||||
@@ -468,10 +658,10 @@ def parseDuration(strDuration):
|
||||
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/')
|
||||
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt')
|
||||
# newsFeed=NewsFeed(PATH_VIDEO_DATABASE, myLog())
|
||||
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/', myLog())
|
||||
# newsFeed=NewsFeed('C:/Python/NewsFeed/Archive', myLog())
|
||||
# videos=newsFeed.getItemsInFeed(FOX_NEWS_URL)
|
||||
|
||||
# for video in videos:
|
||||
# if(video.description.startswith("Martha")):
|
||||
# print(f"Description={video.description}")
|
||||
# print(f"Url={video.url}")
|
||||
# print(f"getTimestamp={video.getTimestamp().toStringMonthDay()}")
|
||||
@@ -479,6 +669,7 @@ def parseDuration(strDuration):
|
||||
# print(f"getFeedTime={video.getFeedTime()}")
|
||||
# print(f"daysOld={(datetime.now()-video.getFeedTime()).days}")
|
||||
# print(' ')
|
||||
# print(f"Got {len(videos)} videos")
|
||||
|
||||
# pull the time out of the description and subtract it from the time we scanned the feed.
|
||||
# the result will be the time of the article..use this to sort on.
|
||||
|
||||
156
utility.py
156
utility.py
@@ -36,29 +36,62 @@ class StringHelper:
|
||||
def betweenString(strItem, strBegin, strEnd):
|
||||
if strItem is None:
|
||||
return None
|
||||
index=-1
|
||||
|
||||
if strBegin is None:
|
||||
index=0
|
||||
start = 0
|
||||
else:
|
||||
index = strItem.index(strBegin)
|
||||
if -1==index:
|
||||
return None
|
||||
str=None
|
||||
if strBegin is not None:
|
||||
str=strItem[index+len(strBegin):]
|
||||
else:
|
||||
str=strItem
|
||||
try:
|
||||
if strBegin.startswith("<") and strBegin.endswith(">"):
|
||||
tag_name = strBegin[1:-1]
|
||||
start = strItem.index("<" + tag_name)
|
||||
start = strItem.index(">", start) + 1
|
||||
else:
|
||||
start = strItem.index(strBegin) + len(strBegin)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if strEnd is None:
|
||||
return str
|
||||
index=str.index(strEnd)
|
||||
if -1==index :
|
||||
return strItem[start:]
|
||||
|
||||
try:
|
||||
end = strItem.index(strEnd, start)
|
||||
except ValueError:
|
||||
return None
|
||||
sb=""
|
||||
for strIndex in range(0, len(str)-1):
|
||||
if index==strIndex:
|
||||
break
|
||||
sb=sb+str[strIndex]
|
||||
return (sb)
|
||||
|
||||
return strItem[start:end]
|
||||
|
||||
# @staticmethod
|
||||
# def betweenString(strItem, strBegin, strEnd):
|
||||
# if strItem is None:
|
||||
# return None
|
||||
# index = -1
|
||||
# if strBegin is None:
|
||||
# index = 0
|
||||
# else:
|
||||
# try:
|
||||
# if strBegin.startswith("<") and strBegin.endswith(">"):
|
||||
# tag_name = strBegin[1:-1] # e.g. "time"
|
||||
# index = strItem.index("<" + tag_name)
|
||||
# index = strItem.index(">", index) + 1
|
||||
# else:
|
||||
# index = strItem.index(strBegin) + len(strBegin)
|
||||
# except ValueError:
|
||||
# return None
|
||||
# if index == -1:
|
||||
# return None
|
||||
# str = strItem[index:] if strBegin is not None else strItem
|
||||
# if strEnd is None:
|
||||
# return str
|
||||
# try:
|
||||
# index = str.index(strEnd)
|
||||
# except ValueError:
|
||||
# return None
|
||||
# sb = ""
|
||||
# for strIndex in range(0, len(str) - 1):
|
||||
# if index == strIndex:
|
||||
# break
|
||||
# sb = sb + str[strIndex]
|
||||
# return sb
|
||||
|
||||
class HttpNetRequest:
|
||||
def __init__(self):
|
||||
@@ -117,66 +150,97 @@ class DateTimeHelper:
|
||||
def getCurrentDateTime():
|
||||
return datetime.now()
|
||||
|
||||
# January 1, 2026
|
||||
@staticmethod
|
||||
def strptime(theTime,theFormat):
|
||||
try:
|
||||
return datetime.strptime(theTime,theFormat)
|
||||
except:
|
||||
return datetime(*(time.strptime(theTime,theFormat)[0:6]))
|
||||
def strptime(date_string):
|
||||
month_map = {
|
||||
'January': 1, 'February': 2, 'March': 3, 'April': 4,
|
||||
'May': 5, 'June': 6, 'July': 7, 'August': 8,
|
||||
'September': 9, 'October': 10, 'November': 11, 'December': 12
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def canstrptime(theTime,theFormat):
|
||||
try:
|
||||
datetime.strptime(theTime,theFormat)
|
||||
return True
|
||||
except:
|
||||
date_string = date_string.replace(',', '')
|
||||
parts = date_string.split()
|
||||
if len(parts) == 3:
|
||||
month_str, day_str, year_str = parts
|
||||
month = month_map.get(month_str)
|
||||
day = int(day_str)
|
||||
year = int(year_str)
|
||||
|
||||
if month is not None:
|
||||
return datetime(year, month, day)
|
||||
else:
|
||||
raise ValueError("Invalid month name in date string")
|
||||
else:
|
||||
raise ValueError("Date string format is incorrect")
|
||||
|
||||
# January 1, 2026
|
||||
@staticmethod
|
||||
def canstrptime(date_string):
|
||||
month_map = {
|
||||
'January': 1, 'February': 2, 'March': 3, 'April': 4,
|
||||
'May': 5, 'June': 6, 'July': 7, 'August': 8,
|
||||
'September': 9, 'October': 10, 'November': 11, 'December': 12
|
||||
}
|
||||
|
||||
date_string = date_string.replace(',', '')
|
||||
parts = date_string.split()
|
||||
if len(parts) != 3:
|
||||
return False
|
||||
|
||||
month_str, day_str, year_str = parts
|
||||
month = month_map.get(month_str)
|
||||
if month is None:
|
||||
return False
|
||||
day = int(day_str)
|
||||
year = int(year_str)
|
||||
return True
|
||||
|
||||
# returns a datetime
|
||||
@staticmethod
|
||||
def applyRelativeTime(sometime,relativetime):
|
||||
relativeTimeResult = sometime
|
||||
if(not isinstance(sometime,datetime)):
|
||||
raise Exception('Invalid type for parameter')
|
||||
if(not isinstance(relativetime,str)):
|
||||
raise Exception('Invalid type for parameter')
|
||||
if DateTimeHelper.canstrptime(relativetime,'%B %d, %Y'):
|
||||
sometime = DateTimeHelper.strptime(relativetime,'%B %d, %Y')
|
||||
return sometime
|
||||
if DateTimeHelper.canstrptime(relativetime):
|
||||
relativeTimeResult = DateTimeHelper.strptime(relativetime)
|
||||
return relativeTimeResult
|
||||
if relativetime=='just now':
|
||||
return sometime
|
||||
return relativeTimeResult
|
||||
if relativetime=='just in':
|
||||
return sometime
|
||||
return relativeTimeResult
|
||||
relativetimesplit=relativetime.split()
|
||||
if len(relativetimesplit)==2:
|
||||
year=datetime.now().year
|
||||
relativetimex=relativetime+', '+str(year)
|
||||
relativeDate = DateTimeHelper.strptime(relativetimex, '%B %d, %Y')
|
||||
relativeDate = DateTimeHelper.strptime(relativetimex)
|
||||
if(relativeDate>datetime.now()):
|
||||
year=datetime.now().year-1
|
||||
relativetimex=relativetime+', '+str(year)
|
||||
relativeDate=DateTimeHelper.strptime(relativetimex,'%B %d, %Y')
|
||||
relativeDate=DateTimeHelper.strptime(relativetimex)
|
||||
days=sometime-relativeDate
|
||||
sometime=sometime-days
|
||||
relativeTimeResult=sometime-days
|
||||
elif relativetimesplit[1]=='hour' or relativetimesplit[1]=='hours':
|
||||
hours=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(hours=hours)
|
||||
relativeTimeResult=sometime-timedelta(hours=hours)
|
||||
elif relativetimesplit[1]=='day' or relativetimesplit[1]=='days':
|
||||
days=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(days=days)
|
||||
relativeTimeResult=sometime-timedelta(days=days)
|
||||
elif relativetimesplit[1]=='minute' or relativetimesplit[1]=='minutes':
|
||||
minutes=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(minutes=minutes)
|
||||
relativeTimeResult=sometime-timedelta(minutes=minutes)
|
||||
elif len(relativetimesplit)==3: # '16 mins ago' '2 hours ago'
|
||||
if relativetimesplit[1]=='mins':
|
||||
minutes=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(minutes=minutes)
|
||||
relativeTimeResult=sometime-timedelta(minutes=minutes)
|
||||
elif relativetimesplit[1]=='hours':
|
||||
hours=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(hours=hours)
|
||||
relativeTimeResult=sometime-timedelta(hours=hours)
|
||||
elif relativetimesplit[1]=='day' or relativetimesplit[1]=='days':
|
||||
days=int(relativetimesplit[0])
|
||||
sometime=sometime-timedelta(days=days)
|
||||
return sometime
|
||||
relativeTimeResult=sometime-timedelta(days=days)
|
||||
return relativeTimeResult
|
||||
|
||||
class DateTime:
|
||||
def __init__(self):
|
||||
|
||||
7
video.py
7
video.py
@@ -68,7 +68,12 @@ class Video:
|
||||
description=splits[0].strip()
|
||||
url=splits[1].strip()
|
||||
icon=splits[2].strip()
|
||||
timestamp=DateTime(splits[3].strip())
|
||||
datePart = splits[3].strip()
|
||||
timestamp = DateTime()
|
||||
try :
|
||||
timestamp=DateTime(datePart)
|
||||
except Exception as exception:
|
||||
print(f"Encountered invalid date '{datePart}'")
|
||||
return(Video(description,url,icon,timestamp))
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user